diff --git a/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java b/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java index 85c82bddbd..a1aa79a3ec 100644 --- a/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java +++ b/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java @@ -1,4 +1,4 @@ -/* +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -50,6 +50,10 @@ private Binary() { } abstract public int length(); + abstract public byte[] getValue(); + + abstract public int getOffset(); + abstract public void writeTo(OutputStream out) throws IOException; abstract public void writeTo(DataOutput out) throws IOException; @@ -145,6 +149,16 @@ public int length() { return length; } + @Override + public byte[] getValue() { + return value; + } + + @Override + public int getOffset() { + return offset; + } + @Override public void writeTo(OutputStream out) throws IOException { out.write(value, offset, length); @@ -292,6 +306,16 @@ public int length() { return value.length; } + @Override + public byte[] getValue() { + return value; + } + + @Override + public int getOffset() { + return 0; + } + @Override public void writeTo(OutputStream out) throws IOException { out.write(value); @@ -419,6 +443,16 @@ public int length() { return length; } + @Override + public byte[] getValue() { + return value.array(); + } + + @Override + public int getOffset() { + return offset; + } + @Override public void writeTo(OutputStream out) throws IOException { if (value.hasArray()) { diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveComparator.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveComparator.java index 5e9adbcf7b..417f7b3a85 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveComparator.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveComparator.java @@ -181,13 +181,6 @@ public String toString() { }; private static abstract class BinaryComparator extends PrimitiveComparator { - @Override - int compareNotNulls(Binary o1, Binary o2) { - return compare(o1.toByteBuffer(), o2.toByteBuffer()); - } - - abstract int compare(ByteBuffer b1, ByteBuffer b2); - final int toUnsigned(byte b) { return b & 0xFF; } @@ -195,15 +188,15 @@ final int toUnsigned(byte b) { public static final PrimitiveComparator UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR = new BinaryComparator() { @Override - int compare(ByteBuffer b1, ByteBuffer b2) { - int l1 = b1.remaining(); - int l2 = b2.remaining(); - int p1 = b1.position(); - int p2 = b2.position(); + int compareNotNulls(Binary b1, Binary b2) { + int l1 = b1.length(); + int l2 = b2.length(); + int p1 = b1.getOffset(); + int p2 = b2.getOffset(); int minL = Math.min(l1, l2); for (int i = 0; i < minL; ++i) { - int result = unsignedCompare(b1.get(p1 + i), b2.get(p2 + i)); + int result = unsignedCompare(b1.getValue()[p1 + i], b2.getValue()[p2 + i]); if (result != 0) { return result; } @@ -232,14 +225,14 @@ public String toString() { private static final int POSITIVE_PADDING = 0; @Override - int compare(ByteBuffer b1, ByteBuffer b2) { - int l1 = b1.remaining(); - int l2 = b2.remaining(); - int p1 = b1.position(); - int p2 = b2.position(); - - boolean isNegative1 = l1 > 0 ? b1.get(p1) < 0 : false; - boolean isNegative2 = l2 > 0 ? b2.get(p2) < 0 : false; + int compareNotNulls(Binary b1, Binary b2) { + int l1 = b1.length(); + int l2 = b2.length(); + int p1 = b1.getOffset(); + int p2 = b2.getOffset(); + + boolean isNegative1 = l1 > 0 && b1.getValue()[p1] < 0; + boolean isNegative2 = l2 > 0 && b2.getValue()[p2] < 0; if (isNegative1 != isNegative2) { return isNegative1 ? -1 : 1; } @@ -249,24 +242,24 @@ int compare(ByteBuffer b1, ByteBuffer b2) { // Compare the beginning of the longer buffer with the proper padding if (l1 < l2) { int lengthDiff = l2 - l1; - result = -compareWithPadding(lengthDiff, b2, p2, isNegative1 ? NEGATIVE_PADDING : POSITIVE_PADDING); + result = -compareWithPadding(lengthDiff, b2.getValue(), p2, isNegative1 ? NEGATIVE_PADDING : POSITIVE_PADDING); p2 += lengthDiff; } else if (l1 > l2) { int lengthDiff = l1 - l2; - result = compareWithPadding(lengthDiff, b1, p1, isNegative2 ? NEGATIVE_PADDING : POSITIVE_PADDING); + result = compareWithPadding(lengthDiff, b1.getValue(), p1, isNegative2 ? NEGATIVE_PADDING : POSITIVE_PADDING); p1 += lengthDiff; } // The beginning of the longer buffer equals to the padding or the lengths are equal if (result == 0) { - result = compare(l1, b1, p1, b2, p2); + result = compare(l1, b1.getValue(), p1, b2.getValue(), p2); } return result; } - private int compareWithPadding(int length, ByteBuffer b, int p, int paddingByte) { + private int compareWithPadding(int length, byte[] b, int p, int paddingByte) { for (int i = p, n = p + length; i < n; ++i) { - int result = toUnsigned(b.get(i)) - paddingByte; + int result = toUnsigned(b[i]) - paddingByte; if (result != 0) { return result; } @@ -274,9 +267,9 @@ private int compareWithPadding(int length, ByteBuffer b, int p, int paddingByte) return 0; } - private int compare(int length, ByteBuffer b1, int p1, ByteBuffer b2, int p2) { + private int compare(int length, byte[] b1, int p1, byte[] b2, int p2) { for (int i = 0; i < length; ++i) { - int result = toUnsigned(b1.get(p1 + i)) - toUnsigned(b2.get(p2 + i)); + int result = toUnsigned(b1[p1 + i]) - toUnsigned(b2[p2 + i]); if (result != 0) { return result; } diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestStatistics.java b/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestStatistics.java index 5a5d6d4f25..1f956ceb7c 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestStatistics.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/statistics/TestStatistics.java @@ -189,7 +189,7 @@ public StatsValidator(DataPage page) { public void validate(T value) { if (hasNonNull) { assertTrue("min should be <= all values", comparator.compare(min, value) <= 0); - assertTrue("min should be >= all values", comparator.compare(max, value) >= 0); + assertTrue("max should be >= all values", comparator.compare(max, value) >= 0); } } }