diff --git a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/PrimitiveToBoxedClass.java b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/PrimitiveToBoxedClass.java new file mode 100644 index 0000000000..f034f37430 --- /dev/null +++ b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/PrimitiveToBoxedClass.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.filter2.predicate; + +import java.util.HashMap; +import java.util.Map; + +import static org.apache.parquet.Preconditions.checkArgument; + +/** + * Converts a {@code Class} to it's corresponding {@code Class}, eg + * {@code Class} to {@code Class} + */ +public class PrimitiveToBoxedClass { + private static final Map, Class> primitiveToBoxed = new HashMap, Class>(); + + static { + primitiveToBoxed.put(boolean.class, Boolean.class); + primitiveToBoxed.put(byte.class, Byte.class); + primitiveToBoxed.put(short.class, Short.class); + primitiveToBoxed.put(char.class, Character.class); + primitiveToBoxed.put(int.class, Integer.class); + primitiveToBoxed.put(long.class, Long.class); + primitiveToBoxed.put(float.class, Float.class); + primitiveToBoxed.put(double.class, Double.class); + } + + public static Class get(Class c) { + checkArgument(c.isPrimitive(), "Class " + c + " is not primitive!"); + return primitiveToBoxed.get(c); + } + + private PrimitiveToBoxedClass() { } +} diff --git a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/SchemaCompatibilityValidator.java b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/SchemaCompatibilityValidator.java index d473841d95..d4e121118f 100644 --- a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/SchemaCompatibilityValidator.java +++ b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/SchemaCompatibilityValidator.java @@ -22,7 +22,6 @@ import java.util.Map; import org.apache.parquet.column.ColumnDescriptor; -import org.apache.parquet.hadoop.metadata.ColumnPath; import org.apache.parquet.filter2.predicate.Operators.And; import org.apache.parquet.filter2.predicate.Operators.Column; import org.apache.parquet.filter2.predicate.Operators.ColumnFilterPredicate; @@ -36,8 +35,8 @@ import org.apache.parquet.filter2.predicate.Operators.NotEq; import org.apache.parquet.filter2.predicate.Operators.Or; import org.apache.parquet.filter2.predicate.Operators.UserDefined; +import org.apache.parquet.hadoop.metadata.ColumnPath; import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.OriginalType; import static org.apache.parquet.Preconditions.checkArgument; import static org.apache.parquet.Preconditions.checkNotNull; @@ -73,19 +72,11 @@ public static void validate(FilterPredicate predicate, MessageType schema) { // we are validating that what the user provided agrees with these. private final Map columnsAccordingToSchema = new HashMap(); - // the original type of a column, keyed by path - private final Map originalTypes = new HashMap(); - private SchemaCompatibilityValidator(MessageType schema) { for (ColumnDescriptor cd : schema.getColumns()) { ColumnPath columnPath = ColumnPath.get(cd.getPath()); columnsAccordingToSchema.put(columnPath, cd); - - OriginalType ot = schema.getType(cd.getPath()).getOriginalType(); - if (ot != null) { - originalTypes.put(columnPath, ot); - } } } @@ -182,7 +173,7 @@ private > void validateColumn(Column column) { + "Column " + path.toDotString() + " is repeated."); } - ValidTypeMap.assertTypeValid(column, descriptor.getType(), originalTypes.get(path)); + ValidTypeMap.assertTypeValid(column, descriptor.getType()); } private ColumnDescriptor getColumnDescriptor(ColumnPath columnPath) { diff --git a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/ValidTypeMap.java b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/ValidTypeMap.java index 4f8b10d747..574604a1fa 100644 --- a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/ValidTypeMap.java +++ b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/ValidTypeMap.java @@ -23,9 +23,8 @@ import java.util.Map; import java.util.Set; -import org.apache.parquet.hadoop.metadata.ColumnPath; import org.apache.parquet.filter2.predicate.Operators.Column; -import org.apache.parquet.io.api.Binary; +import org.apache.parquet.hadoop.metadata.ColumnPath; import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; @@ -43,40 +42,36 @@ public class ValidTypeMap { private ValidTypeMap() { } // classToParquetType and parquetTypeToClass are used as a bi-directional map - private static final Map, Set> classToParquetType = new HashMap, Set>(); - private static final Map>> parquetTypeToClass = new HashMap>>(); + private static final Map, Set> classToParquetType = new HashMap, Set>(); + private static final Map>> parquetTypeToClass = new HashMap>>(); // set up the mapping in both directions - private static void add(Class c, FullTypeDescriptor f) { - Set descriptors = classToParquetType.get(c); + private static void add(Class c, PrimitiveTypeName p) { + Set descriptors = classToParquetType.get(c); if (descriptors == null) { - descriptors = new HashSet(); + descriptors = new HashSet(); classToParquetType.put(c, descriptors); } - descriptors.add(f); + descriptors.add(p); - Set> classes = parquetTypeToClass.get(f); + Set> classes = parquetTypeToClass.get(p); if (classes == null) { classes = new HashSet>(); - parquetTypeToClass.put(f, classes); + parquetTypeToClass.put(p, classes); } classes.add(c); } static { - // basic primitive columns - add(Integer.class, new FullTypeDescriptor(PrimitiveTypeName.INT32, null)); - add(Long.class, new FullTypeDescriptor(PrimitiveTypeName.INT64, null)); - add(Float.class, new FullTypeDescriptor(PrimitiveTypeName.FLOAT, null)); - add(Double.class, new FullTypeDescriptor(PrimitiveTypeName.DOUBLE, null)); - add(Boolean.class, new FullTypeDescriptor(PrimitiveTypeName.BOOLEAN, null)); + for (PrimitiveTypeName t: PrimitiveTypeName.values()) { + Class c = t.javaType; - // Both of these binary types are valid - add(Binary.class, new FullTypeDescriptor(PrimitiveTypeName.BINARY, null)); - add(Binary.class, new FullTypeDescriptor(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, null)); + if (c.isPrimitive()) { + c = PrimitiveToBoxedClass.get(c); + } - add(Binary.class, new FullTypeDescriptor(PrimitiveTypeName.BINARY, OriginalType.UTF8)); - add(Binary.class, new FullTypeDescriptor(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, OriginalType.UTF8)); + add(c, t); + } } /** @@ -87,14 +82,12 @@ private static void add(Class c, FullTypeDescriptor f) { * * @param foundColumn the column as declared by the user * @param primitiveType the primitive type according to the schema - * @param originalType the original type according to the schema */ - public static > void assertTypeValid(Column foundColumn, PrimitiveTypeName primitiveType, OriginalType originalType) { + public static > void assertTypeValid(Column foundColumn, PrimitiveTypeName primitiveType) { Class foundColumnType = foundColumn.getColumnType(); ColumnPath columnPath = foundColumn.getColumnPath(); - Set validTypeDescriptors = classToParquetType.get(foundColumnType); - FullTypeDescriptor typeInFileMetaData = new FullTypeDescriptor(primitiveType, originalType); + Set validTypeDescriptors = classToParquetType.get(foundColumnType); if (validTypeDescriptors == null) { StringBuilder message = new StringBuilder(); @@ -105,18 +98,18 @@ public static > void assertTypeValid(Column foundColu .append(foundColumnType.getName()) .append(" which is not supported in FilterPredicates."); - Set> supportedTypes = parquetTypeToClass.get(typeInFileMetaData); + Set> supportedTypes = parquetTypeToClass.get(primitiveType); if (supportedTypes != null) { message .append(" Supported types for this column are: ") .append(supportedTypes); } else { - message.append(" There are no supported types for columns of " + typeInFileMetaData); + message.append(" There are no supported types for columns of " + primitiveType); } throw new IllegalArgumentException(message.toString()); } - if (!validTypeDescriptors.contains(typeInFileMetaData)) { + if (!validTypeDescriptors.contains(primitiveType)) { StringBuilder message = new StringBuilder(); message .append("FilterPredicate column: ") @@ -126,53 +119,10 @@ public static > void assertTypeValid(Column foundColu .append(") does not match the schema found in file metadata. Column ") .append(columnPath.toDotString()) .append(" is of type: ") - .append(typeInFileMetaData) + .append(primitiveType) .append("\nValid types for this column are: ") - .append(parquetTypeToClass.get(typeInFileMetaData)); + .append(parquetTypeToClass.get(primitiveType)); throw new IllegalArgumentException(message.toString()); } } - - private static final class FullTypeDescriptor { - private final PrimitiveTypeName primitiveType; - private final OriginalType originalType; - - private FullTypeDescriptor(PrimitiveTypeName primitiveType, OriginalType originalType) { - this.primitiveType = primitiveType; - this.originalType = originalType; - } - - public PrimitiveTypeName getPrimitiveType() { - return primitiveType; - } - - public OriginalType getOriginalType() { - return originalType; - } - - @Override - public String toString() { - return "FullTypeDescriptor(" + "PrimitiveType: " + primitiveType + ", OriginalType: " + originalType + ')'; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - FullTypeDescriptor that = (FullTypeDescriptor) o; - - if (originalType != that.originalType) return false; - if (primitiveType != that.primitiveType) return false; - - return true; - } - - @Override - public int hashCode() { - int result = primitiveType != null ? primitiveType.hashCode() : 0; - result = 31 * result + (originalType != null ? originalType.hashCode() : 0); - return result; - } - } } diff --git a/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java b/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java index 455eae4dea..e40294da2b 100644 --- a/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java +++ b/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestSchemaCompatibilityValidator.java @@ -112,7 +112,7 @@ public void testFindsInvalidTypes() { fail("this should throw"); } catch (IllegalArgumentException e) { assertEquals("FilterPredicate column: x.bar's declared type (java.lang.Long) does not match the schema found in file metadata. " - + "Column x.bar is of type: FullTypeDescriptor(PrimitiveType: INT32, OriginalType: null)\n" + + "Column x.bar is of type: INT32\n" + "Valid types for this column are: [class java.lang.Integer]", e.getMessage()); } } diff --git a/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestValidTypeMap.java b/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestValidTypeMap.java index 8da008f5b5..d44136998d 100644 --- a/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestValidTypeMap.java +++ b/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestValidTypeMap.java @@ -61,26 +61,25 @@ public int compareTo(InvalidColumnType o) { @Test public void testValidTypes() { - assertTypeValid(intColumn, PrimitiveTypeName.INT32, null); - assertTypeValid(longColumn, PrimitiveTypeName.INT64, null); - assertTypeValid(floatColumn, PrimitiveTypeName.FLOAT, null); - assertTypeValid(doubleColumn, PrimitiveTypeName.DOUBLE, null); - assertTypeValid(booleanColumn, PrimitiveTypeName.BOOLEAN, null); - assertTypeValid(binaryColumn, PrimitiveTypeName.BINARY, null); - assertTypeValid(binaryColumn, PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, null); - assertTypeValid(binaryColumn, PrimitiveTypeName.BINARY, OriginalType.UTF8); - assertTypeValid(binaryColumn, PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY, OriginalType.UTF8); + assertTypeValid(intColumn, PrimitiveTypeName.INT32); + assertTypeValid(longColumn, PrimitiveTypeName.INT64); + assertTypeValid(floatColumn, PrimitiveTypeName.FLOAT); + assertTypeValid(doubleColumn, PrimitiveTypeName.DOUBLE); + assertTypeValid(booleanColumn, PrimitiveTypeName.BOOLEAN); + assertTypeValid(binaryColumn, PrimitiveTypeName.BINARY); + assertTypeValid(binaryColumn, PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY); + assertTypeValid(binaryColumn, PrimitiveTypeName.INT96); } @Test public void testMismatchedTypes() { try { - assertTypeValid(intColumn, PrimitiveTypeName.DOUBLE, null); + assertTypeValid(intColumn, PrimitiveTypeName.DOUBLE); fail("This should throw!"); } catch (IllegalArgumentException e) { assertEquals("FilterPredicate column: int.column's declared type (java.lang.Integer) does not match the " + "schema found in file metadata. Column int.column is of type: " - + "FullTypeDescriptor(PrimitiveType: DOUBLE, OriginalType: null)\n" + + "DOUBLE\n" + "Valid types for this column are: [class java.lang.Double]", e.getMessage()); } } @@ -88,24 +87,13 @@ public void testMismatchedTypes() { @Test public void testUnsupportedType() { try { - assertTypeValid(invalidColumn, PrimitiveTypeName.INT32, null); + assertTypeValid(invalidColumn, PrimitiveTypeName.INT32); fail("This should throw!"); } catch (IllegalArgumentException e) { assertEquals("Column invalid.column was declared as type: " + "org.apache.parquet.filter2.predicate.TestValidTypeMap$InvalidColumnType which is not supported " + "in FilterPredicates. Supported types for this column are: [class java.lang.Integer]", e.getMessage()); } - - try { - assertTypeValid(invalidColumn, PrimitiveTypeName.INT32, OriginalType.UTF8); - fail("This should throw!"); - } catch (IllegalArgumentException e) { - assertEquals("Column invalid.column was declared as type: " - + "org.apache.parquet.filter2.predicate.TestValidTypeMap$InvalidColumnType which is not supported " - + "in FilterPredicates. There are no supported types for columns of FullTypeDescriptor(PrimitiveType: INT32, OriginalType: UTF8)", - e.getMessage()); - } - } }