Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 3 additions & 37 deletions parquet-cli/src/main/java/org/apache/parquet/cli/Util.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,12 @@
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.column.Encoding;
import org.apache.parquet.column.EncodingStats;
import org.apache.parquet.column.statistics.BinaryStatistics;
import org.apache.parquet.column.statistics.BooleanStatistics;
import org.apache.parquet.column.statistics.Statistics;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Type;
import java.nio.charset.StandardCharsets;
import java.util.Set;

import static org.apache.parquet.column.Encoding.BIT_PACKED;
Expand Down Expand Up @@ -90,46 +87,15 @@ public static String minMaxAsString(Statistics stats, OriginalType annotation) {
if (!stats.hasNonNullValue()) {
return "";
}
// TODO: use original types when showing decimal, timestamp, etc.
if (stats instanceof BinaryStatistics) {
byte[] minBytes = stats.getMinBytes();
byte[] maxBytes = stats.getMaxBytes();
return String.format("%s / %s",
printable(minBytes, annotation == OriginalType.UTF8, 30),
printable(maxBytes, annotation == OriginalType.UTF8, 30));
} else {
return String.format("%s / %s", stats.minAsString(), stats.maxAsString());
}
return String.format("%s / %s", humanReadable(stats.minAsString(), 30), humanReadable(stats.maxAsString(), 30));
}

public static String toString(Statistics stats, long count, OriginalType annotation) {
if (stats == null) {
return "no stats";
}
// TODO: use original types when showing decimal, timestamp, etc.
if (stats instanceof BooleanStatistics) {
return String.format("nulls: %d/%d", stats.getNumNulls(), count);
} else if (stats instanceof BinaryStatistics) {
byte[] minBytes = stats.getMinBytes();
byte[] maxBytes = stats.getMaxBytes();
return String.format("min: %s max: %s nulls: %d/%d",
printable(minBytes, annotation == OriginalType.UTF8, 30),
printable(maxBytes, annotation == OriginalType.UTF8, 30),
stats.getNumNulls(), count);
} else {
return String.format("min: %s max: %s nulls: %d/%d",
stats.minAsString(), stats.maxAsString(), stats.getNumNulls(), count);
}
}

private static String printable(byte[] bytes, boolean isUtf8, int len) {
if (bytes == null) {
return "null";
} else if (isUtf8) {
return humanReadable(new String(bytes, StandardCharsets.UTF_8), len);
} else {
return humanReadable(bytes, len);
}
return String.format("min: %s max: %s nulls: %d/%d",
humanReadable(stats.minAsString(), 30), humanReadable(stats.maxAsString(), 30), stats.getNumNulls(), count);
}

public static String humanReadable(String str, int len) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,8 @@ public byte[] getMinBytes() {
}

@Override
String toString(Binary value) {
// TODO: have separate toString for different logical types?
return value == null ? "null" : value.toStringUsingUTF8();
String stringify(Binary value) {
return stringifier.stringify(value);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ public byte[] getMinBytes() {
return BytesUtils.booleanToBytes(min);
}

@Override
String stringify(Boolean value) {
return stringifier.stringify(value);
}

@Override
public boolean isSmallerThan(long size) {
return !hasNonNullValue() || (2 < size);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ public byte[] getMinBytes() {
}

@Override
String toString(Double value) {
return String.format("%.5f", value);
String stringify(Double value) {
return stringifier.stringify(value);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ public byte[] getMinBytes() {
}

@Override
String toString(Float value) {
return String.format("%.5f", value);
String stringify(Float value) {
return stringifier.stringify(value);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,8 @@ public byte[] getMinBytes() {
}

@Override
String toString(Integer value) {
// TODO: implement unsigned int as required
return value.toString();
String stringify(Integer value) {
return stringifier.stringify(value);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,8 @@ public byte[] getMinBytes() {
}

@Override
String toString(Long value) {
// TODO: implement unsigned int as required
return value.toString();
String stringify(Long value) {
return stringifier.stringify(value);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,10 @@
package org.apache.parquet.column.statistics;

import java.util.Arrays;
import java.util.Objects;

import org.apache.parquet.column.UnknownColumnTypeException;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.schema.PrimitiveComparator;
import org.apache.parquet.schema.PrimitiveStringifier;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
import org.apache.parquet.schema.Type;
Expand All @@ -40,10 +39,12 @@ public abstract class Statistics<T extends Comparable<T>> {
private final PrimitiveComparator<T> comparator;
private boolean hasNonNullValue;
private long num_nulls;
final PrimitiveStringifier stringifier;

Statistics(PrimitiveType type) {
this.type = type;
this.comparator = type.comparator();
this.stringifier = type.stringifier();
hasNonNullValue = false;
num_nulls = 0;
}
Expand Down Expand Up @@ -287,19 +288,17 @@ public final int compareMaxToValue(T value) {
* Returns the string representation of min for debugging/logging purposes.
*/
public String minAsString() {
return toString(genericGetMin());
return stringify(genericGetMin());
}

/**
* Returns the string representation of max for debugging/logging purposes.
*/
public String maxAsString() {
return toString(genericGetMax());
return stringify(genericGetMax());
}

String toString(T value) {
return Objects.toString(value);
}
abstract String stringify(T value);

/**
* Abstract method to return whether the min and max values fit in the given
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,46 @@
public enum OriginalType {
MAP,
LIST,
UTF8,
UTF8(PrimitiveStringifier.UTF8_STRINGIFIER),
MAP_KEY_VALUE,
ENUM,
DECIMAL,
DATE,
TIME_MILLIS,
TIME_MICROS,
TIMESTAMP_MILLIS,
TIMESTAMP_MICROS,
UINT_8,
UINT_16,
UINT_32,
UINT_64,
INT_8,
INT_16,
INT_32,
INT_64,
JSON,
BSON,
INTERVAL;
ENUM(PrimitiveStringifier.UTF8_STRINGIFIER),
DECIMAL {
@Override
PrimitiveStringifier stringifier(PrimitiveType type) {
return PrimitiveStringifier.createDecimalStringifier(type.getDecimalMetadata().getScale());
}
},
DATE(PrimitiveStringifier.DATE_STRINGIFIER),
TIME_MILLIS(PrimitiveStringifier.TIME_STRINGIFIER),
TIME_MICROS(PrimitiveStringifier.TIME_STRINGIFIER),
TIMESTAMP_MILLIS(PrimitiveStringifier.TIMESTAMP_MILLIS_STRINGIFIER),
TIMESTAMP_MICROS(PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER),
UINT_8(PrimitiveStringifier.UNSIGNED_STRINGIFIER),
UINT_16(PrimitiveStringifier.UNSIGNED_STRINGIFIER),
UINT_32(PrimitiveStringifier.UNSIGNED_STRINGIFIER),
UINT_64(PrimitiveStringifier.UNSIGNED_STRINGIFIER),
INT_8(PrimitiveStringifier.DEFAULT_STRINGIFIER),
INT_16(PrimitiveStringifier.DEFAULT_STRINGIFIER),
INT_32(PrimitiveStringifier.DEFAULT_STRINGIFIER),
INT_64(PrimitiveStringifier.DEFAULT_STRINGIFIER),
JSON(PrimitiveStringifier.UTF8_STRINGIFIER),
BSON(PrimitiveStringifier.DEFAULT_STRINGIFIER),
INTERVAL(PrimitiveStringifier.INTERVAL_STRINGIFIER);

private final PrimitiveStringifier stringifier;

PrimitiveStringifier stringifier(PrimitiveType type) {
if (stringifier == null) {
throw new UnsupportedOperationException("Stringifier is not supported for the original type: " + this);
}
return stringifier;
}

OriginalType() {
this(null);
}

OriginalType(PrimitiveStringifier stringifier) {
this.stringifier = stringifier;
}
}
Loading