Skip to content

Commit e9d3cd7

Browse files
HIVE-29328: Orc acid footer metadata should be case insensitive (#6203)
1 parent d9ec041 commit e9d3cd7

File tree

3 files changed

+53
-10
lines changed

3 files changed

+53
-10
lines changed

ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818

1919
package org.apache.hadoop.hive.ql.io.orc;
2020

21-
import org.apache.commons.collections.CollectionUtils;
21+
import org.apache.commons.collections4.CollectionUtils;
22+
import org.apache.commons.collections4.Equator;
2223
import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus;
2324
import org.apache.hadoop.hive.common.BlobStorageUtils;
2425
import org.apache.hadoop.hive.common.NoDynamicValuesException;
@@ -29,6 +30,7 @@
2930
import java.security.PrivilegedExceptionAction;
3031
import java.util.ArrayList;
3132
import java.util.Arrays;
33+
import java.util.Collection;
3234
import java.util.Collections;
3335
import java.util.HashMap;
3436
import java.util.HashSet;
@@ -378,8 +380,7 @@ public static RecordReader createReaderFromFile(Reader file,
378380
* @return <code>false</code> if an ACID file, <code>true</code> if a simple orc file
379381
*/
380382
public static boolean isOriginal(Reader file) {
381-
return !CollectionUtils.isEqualCollection(file.getSchema().getFieldNames(),
382-
OrcRecordUpdater.ALL_ACID_ROW_NAMES);
383+
return !checkIfAcidRowNamesFilled(file.getSchema().getFieldNames());
383384
}
384385

385386
/**
@@ -388,8 +389,21 @@ public static boolean isOriginal(Reader file) {
388389
* @return <code>false</code> if an ACID file, <code>true</code> if a simple orc file
389390
*/
390391
public static boolean isOriginal(Footer footer) {
391-
return !CollectionUtils.isEqualCollection(footer.getTypesList().get(0).getFieldNamesList(),
392-
OrcRecordUpdater.ALL_ACID_ROW_NAMES);
392+
return !checkIfAcidRowNamesFilled(footer.getTypesList().getFirst().getFieldNamesList());
393+
}
394+
395+
private static boolean checkIfAcidRowNamesFilled(Collection<String> fieldNames) {
396+
return CollectionUtils.isEqualCollection(OrcRecordUpdater.ALL_ACID_ROW_NAMES, fieldNames, new Equator<>() {
397+
@Override
398+
public boolean equate(String s, String t1) {
399+
return s.equalsIgnoreCase(t1);
400+
}
401+
402+
@Override
403+
public int hash(String s) {
404+
return 0;
405+
}
406+
});
393407
}
394408

395409
public static boolean[] genIncludedColumns(TypeDescription readerSchema,

ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ public class OrcRecordUpdater implements RecordUpdater {
9292
static final String ROW_ID_FIELD_NAME = "rowId";
9393
static final String CURRENT_WRITEID_FIELD_NAME = "currentTransaction";
9494
static final String ROW_FIELD_NAME = "row";
95-
public static final Collection ALL_ACID_ROW_NAMES = Arrays.asList(
95+
public static final Collection<String> ALL_ACID_ROW_NAMES = Arrays.asList(
9696
OrcRecordUpdater.BUCKET_FIELD_NAME,
9797
OrcRecordUpdater.CURRENT_WRITEID_FIELD_NAME,
9898
OrcRecordUpdater.ORIGINAL_WRITEID_FIELD_NAME,

ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@
4545

4646
public class TestFixAcidKeyIndex {
4747
public final static Logger LOG = LoggerFactory.getLogger(TestFixAcidKeyIndex.class);
48+
final static String typeStr = "struct<operation:int," +
49+
"originalTransaction:bigint,bucket:int,rowId:bigint," +
50+
"currentTransaction:bigint," +
51+
"row:struct<a:int,b:struct<c:int>,d:string>>";
4852

4953
@Rule
5054
public TestName testCaseName = new TestName();
@@ -72,12 +76,15 @@ static abstract class TestKeyIndexBuilder
7276
}
7377

7478
void createTestAcidFile(Path path, int numRows, TestKeyIndexBuilder indexBuilder) throws Exception {
79+
createTestAcidFile(path, numRows, indexBuilder, typeStr);
80+
}
81+
82+
void createTestAcidFile(Path path,
83+
int numRows,
84+
TestKeyIndexBuilder indexBuilder,
85+
String typeStr) throws Exception {
7586
FileSystem fs = path.getFileSystem(conf);
7687
fs.delete(path, true);
77-
String typeStr = "struct<operation:int," +
78-
"originalTransaction:bigint,bucket:int,rowId:bigint," +
79-
"currentTransaction:bigint," +
80-
"row:struct<a:int,b:struct<c:int>,d:string>>";
8188
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeStr);
8289
Writer writer = OrcFile.createWriter(path,
8390
OrcFile.writerOptions(conf)
@@ -219,6 +226,28 @@ public void testValidKeyIndex() throws Exception {
219226
fixValidIndex(testFilePath);
220227
}
221228

229+
@Test
230+
public void testValidKeyIndexWithAcidMetadataLowerCase() throws Exception {
231+
String lowerCaseTypeStr = typeStr.toLowerCase();
232+
// Try with 0 row file.
233+
createTestAcidFile(testFilePath, 0, new GoodKeyIndexBuilder(), lowerCaseTypeStr);
234+
checkValidKeyIndex(testFilePath);
235+
// Attempting to fix a valid - should not result in a new file.
236+
fixValidIndex(testFilePath);
237+
238+
// Try single stripe
239+
createTestAcidFile(testFilePath, 100, new GoodKeyIndexBuilder(), lowerCaseTypeStr);
240+
checkValidKeyIndex(testFilePath);
241+
// Attempting to fix a valid - should not result in a new file.
242+
fixValidIndex(testFilePath);
243+
244+
// Multiple stripes
245+
createTestAcidFile(testFilePath, 12000, new GoodKeyIndexBuilder(), lowerCaseTypeStr);
246+
checkValidKeyIndex(testFilePath);
247+
// Attempting to fix a valid - should not result in a new file.
248+
fixValidIndex(testFilePath);
249+
}
250+
222251
@Test
223252
public void testInvalidKeyIndex() throws Exception {
224253
// Try single stripe

0 commit comments

Comments
 (0)