Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove unused fields from DataFile. #914

Merged
merged 1 commit into from
Apr 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions api/src/main/java/org/apache/iceberg/DataFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@ static StructType getType(StructType partitionType) {
required(103, "record_count", LongType.get()),
required(104, "file_size_in_bytes", LongType.get()),
required(105, "block_size_in_bytes", LongType.get()),
optional(106, "file_ordinal", IntegerType.get()),
optional(107, "sort_columns", ListType.ofRequired(112, IntegerType.get())),
optional(108, "column_sizes", MapType.ofRequired(117, 118,
IntegerType.get(), LongType.get())),
optional(109, "value_counts", MapType.ofRequired(119, 120,
Expand Down Expand Up @@ -89,16 +87,6 @@ static StructType getType(StructType partitionType) {
*/
long fileSizeInBytes();

/**
* @return file ordinal if written in a global ordering, or null
*/
Integer fileOrdinal();

/**
* @return list of columns the file records are sorted by, or null
*/
List<Integer> sortColumns();

/**
* @return if collected, map from column ID to the size of the column in bytes, null otherwise
*/
Expand Down
10 changes: 0 additions & 10 deletions api/src/test/java/org/apache/iceberg/TestHelpers.java
Original file line number Diff line number Diff line change
Expand Up @@ -318,16 +318,6 @@ public long fileSizeInBytes() {
return 0;
}

@Override
public Integer fileOrdinal() {
return null;
}

@Override
public List<Integer> sortColumns() {
return null;
}

@Override
public Map<Integer, Long> columnSizes() {
return null;
Expand Down
48 changes: 12 additions & 36 deletions core/src/main/java/org/apache/iceberg/GenericDataFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@ public PartitionData copy() {
private long fileSizeInBytes = -1L;

// optional fields
private Integer fileOrdinal = null; // boxed for nullability
private List<Integer> sortColumns = null;
private Map<Integer, Long> columnSizes = null;
private Map<Integer, Long> valueCounts = null;
private Map<Integer, Long> nullValueCounts = null;
Expand Down Expand Up @@ -173,8 +171,6 @@ private GenericDataFile(GenericDataFile toCopy, boolean fullCopy) {
this.partitionType = toCopy.partitionType;
this.recordCount = toCopy.recordCount;
this.fileSizeInBytes = toCopy.fileSizeInBytes;
this.fileOrdinal = toCopy.fileOrdinal;
this.sortColumns = copy(toCopy.sortColumns);
if (fullCopy) {
// TODO: support lazy conversion to/from map
this.columnSizes = copy(toCopy.columnSizes);
Expand Down Expand Up @@ -225,16 +221,6 @@ public long fileSizeInBytes() {
return fileSizeInBytes;
}

@Override
public Integer fileOrdinal() {
return fileOrdinal;
}

@Override
public List<Integer> sortColumns() {
return sortColumns;
}

@Override
public Map<Integer, Long> columnSizes() {
return columnSizes;
Expand Down Expand Up @@ -306,30 +292,24 @@ public void put(int i, Object v) {
case 5:
return;
case 6:
this.fileOrdinal = (Integer) v;
return;
case 7:
this.sortColumns = (List<Integer>) v;
return;
case 8:
this.columnSizes = (Map<Integer, Long>) v;
return;
case 9:
case 7:
this.valueCounts = (Map<Integer, Long>) v;
return;
case 10:
case 8:
this.nullValueCounts = (Map<Integer, Long>) v;
return;
case 11:
case 9:
this.lowerBounds = SerializableByteBufferMap.wrap((Map<Integer, ByteBuffer>) v);
return;
case 12:
case 10:
this.upperBounds = SerializableByteBufferMap.wrap((Map<Integer, ByteBuffer>) v);
return;
case 13:
case 11:
this.keyMetadata = ByteBuffers.toByteArray((ByteBuffer) v);
return;
case 14:
case 12:
this.splitOffsets = (List<Long>) v;
return;
default:
Expand Down Expand Up @@ -365,22 +345,18 @@ public Object get(int i) {
// to maintain compatibility, we need to return something.
return DEFAULT_BLOCK_SIZE;
case 6:
return fileOrdinal;
case 7:
return sortColumns;
case 8:
return columnSizes;
case 9:
case 7:
return valueCounts;
case 10:
case 8:
return nullValueCounts;
case 11:
case 9:
return lowerBounds;
case 12:
case 10:
return upperBounds;
case 13:
case 11:
return keyMetadata();
case 14:
case 12:
return splitOffsets;
default:
throw new UnsupportedOperationException("Unknown field ordinal: " + pos);
Expand Down
26 changes: 6 additions & 20 deletions core/src/main/java/org/apache/iceberg/ManifestEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -276,22 +276,18 @@ public Object get(int pos) {
case 5:
return DEFAULT_BLOCK_SIZE;
case 6:
return wrapped.fileOrdinal();
case 7:
return wrapped.sortColumns();
case 8:
return wrapped.columnSizes();
case 9:
case 7:
return wrapped.valueCounts();
case 10:
case 8:
return wrapped.nullValueCounts();
case 11:
case 9:
return wrapped.lowerBounds();
case 12:
case 10:
return wrapped.upperBounds();
case 13:
case 11:
return wrapped.keyMetadata();
case 14:
case 12:
return wrapped.splitOffsets();
}
throw new IllegalArgumentException("Unknown field ordinal: " + pos);
Expand Down Expand Up @@ -332,16 +328,6 @@ public long fileSizeInBytes() {
return wrapped.fileSizeInBytes();
}

@Override
public Integer fileOrdinal() {
return wrapped.fileOrdinal();
}

@Override
public List<Integer> sortColumns() {
return wrapped.sortColumns();
}

@Override
public Map<Integer, Long> columnSizes() {
return wrapped.columnSizes();
Expand Down
4 changes: 2 additions & 2 deletions site/docs/spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,8 @@ The schema of a manifest file is a struct called `manifest_entry` with the follo
| **`103 record_count`** | `long` | Number of records in this file |
| **`104 file_size_in_bytes`** | `long` | Total file size in bytes |
| ~~**`105 block_size_in_bytes`**~~ | `long` | **Deprecated. Always write a default value and do not read.** |
| **`106 file_ordinal`** | `optional int` | Ordinal of the file w.r.t files with the same partition tuple and snapshot id |
| **`107 sort_columns`** | `optional list` | Columns the file is sorted by |
| ~~**`106 file_ordinal`**~~ | `optional int` | **Deprecated. Do not use.** |
| ~~**`107 sort_columns`**~~ | `optional list` | **Deprecated. Do not use.** |
| **`108 column_sizes`** | `optional map` | Map from column id to the total size on disk of all regions that store the column. Does not include bytes necessary to read other columns, like footers. Leave null for row-oriented formats (Avro). |
| **`109 value_counts`** | `optional map` | Map from column id to number of values in the column (including null values) |
| **`110 null_value_counts`** | `optional map` | Map from column id to number of null values in the column |
Expand Down