From 90033cfd1b0cb38aa85a937ef973fe6511949036 Mon Sep 17 00:00:00 2001 From: Raghav Aggarwal Date: Tue, 16 Dec 2025 23:42:39 +0530 Subject: [PATCH 1/2] HIVE-29375: FULL OUTER JOIN is failing with Unexpected hash table key type DATE --- ...torMapJoinOuterGenerateResultOperator.java | 1 + .../VectorMapJoinOptimizedLongHashMap.java | 6 + .../vector_full_outer_join_date.q | 29 ++++ .../llap/vector_full_outer_join_date.q.out | 149 ++++++++++++++++++ 4 files changed, 185 insertions(+) create mode 100644 ql/src/test/queries/clientpositive/vector_full_outer_join_date.q create mode 100644 ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java index fff2f28a097f..e83b178e4dc7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java @@ -824,6 +824,7 @@ protected void generateFullOuterSmallTableNoMatches(byte smallTablePos, case SHORT: case INT: case LONG: + case DATE: generateFullOuterLongKeySmallTableNoMatches(); break; case STRING: diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java index 65c51270b8e6..cafd8326e1b0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java @@ -89,6 +89,9 @@ public void init() { case LONG: integerTypeInfo = TypeInfoFactory.longTypeInfo; break; + case DATE: + integerTypeInfo = TypeInfoFactory.dateTypeInfo; + break; default: throw new RuntimeException("Unexpected key type " + hashMap.hashTableKeyType); } @@ -123,6 +126,9 @@ private boolean readNonMatchedLongKey(ByteSegmentRef keyRef) throws HiveExceptio case LONG: longValue = keyBinarySortableDeserializeRead.currentLong; break; + case DATE: + longValue = keyBinarySortableDeserializeRead.currentDateWritable.getDays(); + break; default: throw new RuntimeException("Unexpected key type " + hashMap.hashTableKeyType); } diff --git a/ql/src/test/queries/clientpositive/vector_full_outer_join_date.q b/ql/src/test/queries/clientpositive/vector_full_outer_join_date.q new file mode 100644 index 000000000000..ba9645e50b55 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_full_outer_join_date.q @@ -0,0 +1,29 @@ +set hive.optimize.dynamic.partition.hashjoin=true; +set hive.auto.convert.join=true; + +-- Test Date column +create table tbl1 (id int, event_date date); +create table tbl2 (id int, event_date date); + +insert into tbl1 values (1, '2023-01-01'), (2, '2023-01-02'), (3, '2023-01-03'); +insert into tbl2 values (2, '2023-01-02'), (3, '2023-01-04'), (4, '2023-01-05'); + +select tbl1.id, tbl1.event_date from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id; + +-- Test timestamp column +create table tbl3 (id int, event_date timestamp); +create table tbl4 (id int, event_date timestamp); + +insert into tbl3 values (1, '2025-12-17 10:20:30'), (2, '2025-12-17 11:20:30'); +insert into tbl4 values (2, '2025-12-17 11:20:30'), (3, '2025-12-17 09:20:30'); + +select tbl3.id, tbl3.event_date from tbl3 full outer join tbl4 on tbl3.event_date = tbl4.event_date order by tbl3.id; + +-- Test Double column +create table tbl5 (id int, val double); +create table tbl6 (id int, val double); + +insert into tbl5 values (1, 5.6D), (2, 3.2D); +insert into tbl6 values (2, 3.2D), (3, 7.2D); + +select tbl5.id, tbl5.val from tbl5 full outer join tbl6 on tbl5.val = tbl6.val order by tbl5.id; diff --git a/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out b/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out new file mode 100644 index 000000000000..1c61ff45eede --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out @@ -0,0 +1,149 @@ +PREHOOK: query: create table tbl1 (id int, event_date date) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl1 +POSTHOOK: query: create table tbl1 (id int, event_date date) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl1 +PREHOOK: query: create table tbl2 (id int, event_date date) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl2 +POSTHOOK: query: create table tbl2 (id int, event_date date) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl2 +PREHOOK: query: insert into tbl1 values (1, '2023-01-01'), (2, '2023-01-02'), (3, '2023-01-03') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl1 +POSTHOOK: query: insert into tbl1 values (1, '2023-01-01'), (2, '2023-01-02'), (3, '2023-01-03') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl1 +POSTHOOK: Lineage: tbl1.event_date SCRIPT [] +POSTHOOK: Lineage: tbl1.id SCRIPT [] +PREHOOK: query: insert into tbl2 values (2, '2023-01-02'), (3, '2023-01-04'), (4, '2023-01-05') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl2 +POSTHOOK: query: insert into tbl2 values (2, '2023-01-02'), (3, '2023-01-04'), (4, '2023-01-05') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl2 +POSTHOOK: Lineage: tbl2.event_date SCRIPT [] +POSTHOOK: Lineage: tbl2.id SCRIPT [] +PREHOOK: query: select tbl1.id, tbl1.event_date from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select tbl1.id, tbl1.event_date from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +1 2023-01-01 +2 2023-01-02 +3 2023-01-03 +NULL NULL +NULL NULL +PREHOOK: query: create table tbl3 (id int, event_date timestamp) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl3 +POSTHOOK: query: create table tbl3 (id int, event_date timestamp) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl3 +PREHOOK: query: create table tbl4 (id int, event_date timestamp) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl4 +POSTHOOK: query: create table tbl4 (id int, event_date timestamp) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl4 +PREHOOK: query: insert into tbl3 values (1, '2025-12-17 10:20:30'), (2, '2025-12-17 11:20:30') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl3 +POSTHOOK: query: insert into tbl3 values (1, '2025-12-17 10:20:30'), (2, '2025-12-17 11:20:30') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl3 +POSTHOOK: Lineage: tbl3.event_date SCRIPT [] +POSTHOOK: Lineage: tbl3.id SCRIPT [] +PREHOOK: query: insert into tbl4 values (2, '2025-12-17 11:20:30'), (3, '2025-12-17 09:20:30') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl4 +POSTHOOK: query: insert into tbl4 values (2, '2025-12-17 11:20:30'), (3, '2025-12-17 09:20:30') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl4 +POSTHOOK: Lineage: tbl4.event_date SCRIPT [] +POSTHOOK: Lineage: tbl4.id SCRIPT [] +PREHOOK: query: select tbl3.id, tbl3.event_date from tbl3 full outer join tbl4 on tbl3.event_date = tbl4.event_date order by tbl3.id +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl3 +PREHOOK: Input: default@tbl4 +#### A masked pattern was here #### +POSTHOOK: query: select tbl3.id, tbl3.event_date from tbl3 full outer join tbl4 on tbl3.event_date = tbl4.event_date order by tbl3.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl3 +POSTHOOK: Input: default@tbl4 +#### A masked pattern was here #### +1 2025-12-17 10:20:30 +2 2025-12-17 11:20:30 +NULL NULL +PREHOOK: query: create table tbl5 (id int, val double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl5 +POSTHOOK: query: create table tbl5 (id int, val double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl5 +PREHOOK: query: create table tbl6 (id int, val double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl6 +POSTHOOK: query: create table tbl6 (id int, val double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl6 +PREHOOK: query: insert into tbl5 values (1, 5.6D), (2, 3.2D) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl5 +POSTHOOK: query: insert into tbl5 values (1, 5.6D), (2, 3.2D) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl5 +POSTHOOK: Lineage: tbl5.id SCRIPT [] +POSTHOOK: Lineage: tbl5.val SCRIPT [] +PREHOOK: query: insert into tbl6 values (2, 3.2D), (3, 7.2D) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl6 +POSTHOOK: query: insert into tbl6 values (2, 3.2D), (3, 7.2D) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl6 +POSTHOOK: Lineage: tbl6.id SCRIPT [] +POSTHOOK: Lineage: tbl6.val SCRIPT [] +PREHOOK: query: select tbl5.id, tbl5.val from tbl5 full outer join tbl6 on tbl5.val = tbl6.val order by tbl5.id +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl5 +PREHOOK: Input: default@tbl6 +#### A masked pattern was here #### +POSTHOOK: query: select tbl5.id, tbl5.val from tbl5 full outer join tbl6 on tbl5.val = tbl6.val order by tbl5.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl5 +POSTHOOK: Input: default@tbl6 +#### A masked pattern was here #### +1 5.6 +2 3.2 +NULL NULL From b2674e5b39386a2bc4b287cd6e9892d801a6274a Mon Sep 17 00:00:00 2001 From: Raghav Aggarwal Date: Mon, 22 Dec 2025 14:22:28 +0530 Subject: [PATCH 2/2] Add DATE support in test code as well --- .../VectorMapJoinOptimizedLongHashMap.java | 4 +- .../vector/mapjoin/MapJoinTestConfig.java | 4 ++ .../mapjoin/fast/CheckFastRowHashMap.java | 5 +++ .../fast/TestVectorMapJoinFastRowHashMap.java | 41 +++++++++++++++++++ 4 files changed, 52 insertions(+), 2 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java index cafd8326e1b0..aeaab826898c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java @@ -90,8 +90,8 @@ public void init() { integerTypeInfo = TypeInfoFactory.longTypeInfo; break; case DATE: - integerTypeInfo = TypeInfoFactory.dateTypeInfo; - break; + integerTypeInfo = TypeInfoFactory.dateTypeInfo; + break; default: throw new RuntimeException("Unexpected key type " + hashMap.hashTableKeyType); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java index e4674d81efc5..8597229c3e3a 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java @@ -394,6 +394,9 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t case LONG: hashTableKeyType = HashTableKeyType.LONG; break; + case DATE: + hashTableKeyType = HashTableKeyType.DATE; + break; case STRING: hashTableKeyType = HashTableKeyType.STRING; break; @@ -547,6 +550,7 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator( case BYTE: case SHORT: case INT: + case DATE: case LONG: switch (VectorMapJoinVariation) { case INNER: diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java index 5a9f180b3f39..e0d387718b53 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.lazy.VerifyLazy; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; @@ -335,6 +336,7 @@ public void verify(VectorMapJoinFastHashTableContainerBase map, case SHORT: case INT: case LONG: + case DATE: { Object[] keyRow = element.getKeyRow(); Object keyObject = keyRow[0]; @@ -357,6 +359,9 @@ public void verify(VectorMapJoinFastHashTableContainerBase map, case LONG: longKey = ((LongWritable) keyObject).get(); break; + case DATE: + longKey = ((DateWritableV2) keyObject).getDays(); + break; default: throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name()); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java index f5eb68c6ba7b..0a751728cc79 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java @@ -495,6 +495,47 @@ public void testBigIntRowsExact() throws Exception { /* doClipping */ false, /* useExactBytes */ true); } + @Test + public void testDateRowsExact() throws Exception { + random = new Random(44332); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashMapContainer map = + new VectorMapJoinFastLongHashMapContainer( + false, + false, + HashTableKeyType.DATE, + LARGE_CAPACITY, + LOAD_FACTOR, + LARGE_WB_SIZE, + -1, + tableDesc, + 4); + + VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap(); + VectorRandomRowSource valueSource = new VectorRandomRowSource(); + + valueSource.init( + random, + VectorRandomRowSource.SupportedTypes.ALL, + 4, + /* allowNulls */ false, /* isUnicodeOk */ + false); + + int rowCount = 1000; + Object[][] rows = valueSource.randomRows(rowCount); + + addAndVerifyRows( + valueSource, + rows, + map, + HashTableKeyType.DATE, + verifyTable, + new String[] {"date"}, + /* doClipping */ false, /* useExactBytes */ + true); + } + @Test public void testIntRowsExact() throws Exception { random = new Random(8238383);