Skip to content

Commit

Permalink
Renaming DataFilesSummary -> FilesSummary (#270)
Browse files Browse the repository at this point in the history
## Summary
addressing feedback on #265 

which pointed out the misnaming of internal variable. no functional
change though to behavior.

## Changes

- [ ] Client-facing API Changes
- [ ] Internal API Changes
- [ ] Bug Fixes
- [ ] New Features
- [ ] Performance Improvements
- [X] Code Style
- [ ] Refactoring
- [ ] Documentation
- [ ] Tests


## Testing Done
<!--- Check any relevant boxes with "x" -->

- [ ] Manually Tested on local docker setup. Please include commands
ran, and their output.
- [ ] Added new tests for the changes made.
- [ ] Updated existing tests to reflect the changes made.
- [X] No tests added or updated. Please explain why. If unsure, please
feel free to ask for help.
- [ ] Some other form of testing like staging or soak time in
production. Please explain.

this is a change that can be validated by compiler via ./gradlew build

# Additional Information

- [ ] Breaking Changes
- [ ] Deprecations
- [ ] Large PR broken into smaller PRs, and PR plan linked in the
description.
  • Loading branch information
cbb330 authored Dec 16, 2024
1 parent 6d69b6c commit ad3cdcf
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
@Data
@AllArgsConstructor
@NoArgsConstructor
public class DataFilesSummary {
public class FilesSummary {

private Integer content;
private Long sumOfFileSizeBytes;
Expand All @@ -27,10 +27,10 @@ public class DataFilesSummary {
* @return Encoder for DataFilesSummary
*/
private static class EncoderSingleton {
public static final Encoder<DataFilesSummary> instance = Encoders.bean(DataFilesSummary.class);
public static final Encoder<FilesSummary> instance = Encoders.bean(FilesSummary.class);
}

public static Encoder<DataFilesSummary> getEncoder() {
public static Encoder<FilesSummary> getEncoder() {
return EncoderSingleton.instance;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,34 +50,34 @@ public static IcebergTableStats populateStatsOfAllReferencedFiles(
long totalMetadataFilesCount =
referencedManifestFilesCount + referencedManifestListFilesCount + metadataFilesCount;

Map<Integer, DataFilesSummary> allDataFilesSummary =
Map<Integer, FilesSummary> allFilesSummary =
getFileMetadataTable(table, spark, MetadataTableType.ALL_FILES);

long countOfDataFiles =
Optional.ofNullable(allDataFilesSummary.get(FileContent.DATA.id()))
.map(DataFilesSummary::getTotalFileCount)
Optional.ofNullable(allFilesSummary.get(FileContent.DATA.id()))
.map(FilesSummary::getTotalFileCount)
.orElse(0L);
long sumOfDataFileSizeBytes =
Optional.ofNullable(allDataFilesSummary.get(FileContent.DATA.id()))
.map(DataFilesSummary::getSumOfFileSizeBytes)
Optional.ofNullable(allFilesSummary.get(FileContent.DATA.id()))
.map(FilesSummary::getSumOfFileSizeBytes)
.orElse(0L);

long countOfPositionDeleteFiles =
Optional.ofNullable(allDataFilesSummary.get(FileContent.POSITION_DELETES.id()))
.map(DataFilesSummary::getTotalFileCount)
Optional.ofNullable(allFilesSummary.get(FileContent.POSITION_DELETES.id()))
.map(FilesSummary::getTotalFileCount)
.orElse(0L);
long sumOfPositionDeleteFileSizeBytes =
Optional.ofNullable(allDataFilesSummary.get(FileContent.POSITION_DELETES.id()))
.map(DataFilesSummary::getSumOfFileSizeBytes)
Optional.ofNullable(allFilesSummary.get(FileContent.POSITION_DELETES.id()))
.map(FilesSummary::getSumOfFileSizeBytes)
.orElse(0L);

long countOfEqualityDeleteFiles =
Optional.ofNullable(allDataFilesSummary.get(FileContent.EQUALITY_DELETES.id()))
.map(DataFilesSummary::getTotalFileCount)
Optional.ofNullable(allFilesSummary.get(FileContent.EQUALITY_DELETES.id()))
.map(FilesSummary::getTotalFileCount)
.orElse(0L);
long sumOfEqualityDeleteFilesSizeBytes =
Optional.ofNullable(allDataFilesSummary.get(FileContent.EQUALITY_DELETES.id()))
.map(DataFilesSummary::getSumOfFileSizeBytes)
Optional.ofNullable(allFilesSummary.get(FileContent.EQUALITY_DELETES.id()))
.map(FilesSummary::getSumOfFileSizeBytes)
.orElse(0L);

log.info(
Expand Down Expand Up @@ -115,34 +115,34 @@ public static IcebergTableStats populateStatsOfAllReferencedFiles(
public static IcebergTableStats populateStatsForSnapshots(
String fqtn, Table table, SparkSession spark, IcebergTableStats stats) {

Map<Integer, DataFilesSummary> currentSnapshotDataFilesSummary =
Map<Integer, FilesSummary> currentSnapshotFilesSummary =
getFileMetadataTable(table, spark, MetadataTableType.FILES);

long countOfDataFiles =
Optional.ofNullable(currentSnapshotDataFilesSummary.get(FileContent.DATA.id()))
.map(DataFilesSummary::getTotalFileCount)
Optional.ofNullable(currentSnapshotFilesSummary.get(FileContent.DATA.id()))
.map(FilesSummary::getTotalFileCount)
.orElse(0L);
long sumOfDataFileSizeBytes =
Optional.ofNullable(currentSnapshotDataFilesSummary.get(FileContent.DATA.id()))
.map(DataFilesSummary::getSumOfFileSizeBytes)
Optional.ofNullable(currentSnapshotFilesSummary.get(FileContent.DATA.id()))
.map(FilesSummary::getSumOfFileSizeBytes)
.orElse(0L);

long countOfPositionDeleteFiles =
Optional.ofNullable(currentSnapshotDataFilesSummary.get(FileContent.POSITION_DELETES.id()))
.map(DataFilesSummary::getTotalFileCount)
Optional.ofNullable(currentSnapshotFilesSummary.get(FileContent.POSITION_DELETES.id()))
.map(FilesSummary::getTotalFileCount)
.orElse(0L);
long sumOfPositionDeleteFileSizeBytes =
Optional.ofNullable(currentSnapshotDataFilesSummary.get(FileContent.POSITION_DELETES.id()))
.map(DataFilesSummary::getSumOfFileSizeBytes)
Optional.ofNullable(currentSnapshotFilesSummary.get(FileContent.POSITION_DELETES.id()))
.map(FilesSummary::getSumOfFileSizeBytes)
.orElse(0L);

long countOfEqualityDeleteFiles =
Optional.ofNullable(currentSnapshotDataFilesSummary.get(FileContent.EQUALITY_DELETES.id()))
.map(DataFilesSummary::getTotalFileCount)
Optional.ofNullable(currentSnapshotFilesSummary.get(FileContent.EQUALITY_DELETES.id()))
.map(FilesSummary::getTotalFileCount)
.orElse(0L);
long sumOfEqualityDeleteFilesSizeBytes =
Optional.ofNullable(currentSnapshotDataFilesSummary.get(FileContent.EQUALITY_DELETES.id()))
.map(DataFilesSummary::getSumOfFileSizeBytes)
Optional.ofNullable(currentSnapshotFilesSummary.get(FileContent.EQUALITY_DELETES.id()))
.map(FilesSummary::getSumOfFileSizeBytes)
.orElse(0L);

Long currentSnapshotId =
Expand Down Expand Up @@ -264,10 +264,10 @@ private static long getManifestFilesCount(
* Return summary of table files content either from all snapshots or current snapshot depending
* on metadataTableType.
*/
private static Map<Integer, DataFilesSummary> getFileMetadataTable(
private static Map<Integer, FilesSummary> getFileMetadataTable(
Table table, SparkSession spark, MetadataTableType metadataTableType) {
Encoder<DataFilesSummary> dataFilesSummaryEncoder = DataFilesSummary.getEncoder();
Map<Integer, DataFilesSummary> result = new HashMap<>();
Encoder<FilesSummary> dataFilesSummaryEncoder = FilesSummary.getEncoder();
Map<Integer, FilesSummary> result = new HashMap<>();
SparkTableUtil.loadMetadataTable(spark, table, metadataTableType)
.select("content", "file_path", "file_size_in_bytes")
.dropDuplicates()
Expand All @@ -280,7 +280,7 @@ private static Map<Integer, DataFilesSummary> getFileMetadataTable(
int content = row.getContent();
long totalSizeBytes = row.getSumOfFileSizeBytes();
long fileCount = row.getTotalFileCount();
result.put(content, new DataFilesSummary(content, totalSizeBytes, fileCount));
result.put(content, new FilesSummary(content, totalSizeBytes, fileCount));
});
return result;
}
Expand Down

0 comments on commit ad3cdcf

Please sign in to comment.