Skip to content

Commit e8b0b32

Browse files
committed
update based on comments
1 parent 6540213 commit e8b0b32

File tree

5 files changed

+31
-60
lines changed

5 files changed

+31
-60
lines changed

core/src/test/java/org/apache/iceberg/TestMetrics.java

Lines changed: 21 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,19 @@ public abstract class TestMetrics {
102102
optional(2, "doubleCol", DoubleType.get())
103103
);
104104

105+
private static final Record FLOAT_DOUBLE_RECORD_1 = createRecordWithFloatAndDouble(1.2F, 3.4D);
106+
private static final Record FLOAT_DOUBLE_RECORD_2 = createRecordWithFloatAndDouble(5.6F, 7.8D);
107+
private static final Record NAN_ONLY_RECORD = createRecordWithFloatAndDouble(Float.NaN, Double.NaN);
108+
105109
private final byte[] fixed = "abcd".getBytes(StandardCharsets.UTF_8);
106110

111+
private static Record createRecordWithFloatAndDouble(float floatValue, double doubleValue) {
112+
Record record = GenericRecord.create(FLOAT_DOUBLE_ONLY_SCHEMA);
113+
record.setField("floatCol", floatValue);
114+
record.setField("doubleCol", doubleValue);
115+
return record;
116+
}
117+
107118
public abstract FileFormat fileFormat();
108119

109120
public abstract Metrics getMetrics(Schema schema, MetricsConfig metricsConfig, Record... records) throws IOException;
@@ -119,7 +130,7 @@ public boolean supportsSmallRowGroups() {
119130
return false;
120131
}
121132

122-
protected abstract OutputFile createFileToWriteTo() throws IOException;
133+
protected abstract OutputFile createOutputFile() throws IOException;
123134

124135
@Test
125136
public void testMetricsForRepeatedValues() throws IOException {
@@ -335,14 +346,7 @@ public void testMetricsForNullColumns() throws IOException {
335346

336347
@Test
337348
public void testMetricsForNaNColumns() throws IOException {
338-
Record firstRecord = GenericRecord.create(FLOAT_DOUBLE_ONLY_SCHEMA);
339-
firstRecord.setField("floatCol", Float.NaN);
340-
firstRecord.setField("doubleCol", Double.NaN);
341-
Record secondRecord = GenericRecord.create(FLOAT_DOUBLE_ONLY_SCHEMA);
342-
secondRecord.setField("floatCol", Float.NaN);
343-
secondRecord.setField("doubleCol", Double.NaN);
344-
345-
Metrics metrics = getMetrics(FLOAT_DOUBLE_ONLY_SCHEMA, firstRecord, secondRecord);
349+
Metrics metrics = getMetrics(FLOAT_DOUBLE_ONLY_SCHEMA, NAN_ONLY_RECORD, NAN_ONLY_RECORD);
346350
Assert.assertEquals(2L, (long) metrics.recordCount());
347351
assertCounts(1, 2L, 0L, 2L, metrics);
348352
assertCounts(2, 2L, 0L, 2L, metrics);
@@ -353,19 +357,8 @@ public void testMetricsForNaNColumns() throws IOException {
353357

354358
@Test
355359
public void testColumnBoundsWithNaNValueAtFront() throws IOException {
356-
Record nonNaNRecord1 = GenericRecord.create(FLOAT_DOUBLE_ONLY_SCHEMA);
357-
nonNaNRecord1.setField("floatCol", 1.2F);
358-
nonNaNRecord1.setField("doubleCol", 3.4D);
359-
360-
Record nonNaNRecord2 = GenericRecord.create(FLOAT_DOUBLE_ONLY_SCHEMA);
361-
nonNaNRecord2.setField("floatCol", 5.6F);
362-
nonNaNRecord2.setField("doubleCol", 7.8D);
363-
364-
Record nanRecord = GenericRecord.create(FLOAT_DOUBLE_ONLY_SCHEMA);
365-
nanRecord.setField("floatCol", Float.NaN);
366-
nanRecord.setField("doubleCol", Double.NaN);
367-
368-
Metrics metrics = getMetrics(FLOAT_DOUBLE_ONLY_SCHEMA, nanRecord, nonNaNRecord1, nonNaNRecord2);
360+
Metrics metrics = getMetrics(FLOAT_DOUBLE_ONLY_SCHEMA,
361+
NAN_ONLY_RECORD, FLOAT_DOUBLE_RECORD_1, FLOAT_DOUBLE_RECORD_2);
369362
Assert.assertEquals(3L, (long) metrics.recordCount());
370363
assertCounts(1, 3L, 0L, 1L, metrics);
371364
assertCounts(2, 3L, 0L, 1L, metrics);
@@ -383,19 +376,8 @@ public void testColumnBoundsWithNaNValueAtFront() throws IOException {
383376

384377
@Test
385378
public void testColumnBoundsWithNaNValueInMiddle() throws IOException {
386-
Record nonNaNRecord1 = GenericRecord.create(FLOAT_DOUBLE_ONLY_SCHEMA);
387-
nonNaNRecord1.setField("floatCol", 1.2F);
388-
nonNaNRecord1.setField("doubleCol", 3.4D);
389-
390-
Record nonNaNRecord2 = GenericRecord.create(FLOAT_DOUBLE_ONLY_SCHEMA);
391-
nonNaNRecord2.setField("floatCol", 5.6F);
392-
nonNaNRecord2.setField("doubleCol", 7.8D);
393-
394-
Record nanRecord = GenericRecord.create(FLOAT_DOUBLE_ONLY_SCHEMA);
395-
nanRecord.setField("floatCol", Float.NaN);
396-
nanRecord.setField("doubleCol", Double.NaN);
397-
398-
Metrics metrics = getMetrics(FLOAT_DOUBLE_ONLY_SCHEMA, nonNaNRecord1, nanRecord, nonNaNRecord2);
379+
Metrics metrics = getMetrics(FLOAT_DOUBLE_ONLY_SCHEMA,
380+
FLOAT_DOUBLE_RECORD_1, NAN_ONLY_RECORD, FLOAT_DOUBLE_RECORD_2);
399381
Assert.assertEquals(3L, (long) metrics.recordCount());
400382
assertCounts(1, 3L, 0L, 1L, metrics);
401383
assertCounts(2, 3L, 0L, 1L, metrics);
@@ -413,19 +395,8 @@ public void testColumnBoundsWithNaNValueInMiddle() throws IOException {
413395

414396
@Test
415397
public void testColumnBoundsWithNaNValueAtEnd() throws IOException {
416-
Record nonNaNRecord1 = GenericRecord.create(FLOAT_DOUBLE_ONLY_SCHEMA);
417-
nonNaNRecord1.setField("floatCol", 1.2F);
418-
nonNaNRecord1.setField("doubleCol", 3.4D);
419-
420-
Record nonNaNRecord2 = GenericRecord.create(FLOAT_DOUBLE_ONLY_SCHEMA);
421-
nonNaNRecord2.setField("floatCol", 5.6F);
422-
nonNaNRecord2.setField("doubleCol", 7.8D);
423-
424-
Record nanRecord = GenericRecord.create(FLOAT_DOUBLE_ONLY_SCHEMA);
425-
nanRecord.setField("floatCol", Float.NaN);
426-
nanRecord.setField("doubleCol", Double.NaN);
427-
428-
Metrics metrics = getMetrics(FLOAT_DOUBLE_ONLY_SCHEMA, nonNaNRecord1, nonNaNRecord2, nanRecord);
398+
Metrics metrics = getMetrics(FLOAT_DOUBLE_ONLY_SCHEMA,
399+
FLOAT_DOUBLE_RECORD_1, FLOAT_DOUBLE_RECORD_2, NAN_ONLY_RECORD);
429400
Assert.assertEquals(3L, (long) metrics.recordCount());
430401
assertCounts(1, 3L, 0L, 1L, metrics);
431402
assertCounts(2, 3L, 0L, 1L, metrics);
@@ -467,7 +438,7 @@ public void testMetricsForTopLevelWithMultipleRowGroup() throws Exception {
467438
}
468439

469440
// create file with multiple row groups. by using smaller number of bytes
470-
OutputFile outputFile = createFileToWriteTo();
441+
OutputFile outputFile = createOutputFile();
471442
Metrics metrics = getMetricsForRecordsWithSmallRowGroups(SIMPLE_SCHEMA, outputFile, records.toArray(new Record[0]));
472443
InputFile recordsFile = outputFile.toInputFile();
473444

@@ -511,7 +482,7 @@ public void testMetricsForNestedStructFieldsWithMultipleRowGroup() throws IOExce
511482
}
512483

513484
// create file with multiple row groups. by using smaller number of bytes
514-
OutputFile outputFile = createFileToWriteTo();
485+
OutputFile outputFile = createOutputFile();
515486
Metrics metrics = getMetricsForRecordsWithSmallRowGroups(NESTED_SCHEMA, outputFile, records.toArray(new Record[0]));
516487
InputFile recordsFile = outputFile.toInputFile();
517488

data/src/test/java/org/apache/iceberg/TestMergingMetrics.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -141,20 +141,20 @@ private Long getExpectedNaNCount(List<Record> expectedRecords, Types.NestedField
141141
return 0;
142142
}
143143
if (FLOAT_FIELD.equals(field)) {
144-
return value.equals(Float.NaN) ? 1 : 0;
144+
return Float.isNaN((Float) value) ? 1 : 0;
145145
} else if (DOUBLE_FIELD.equals(field)) {
146-
return value.equals(Double.NaN) ? 1 : 0;
146+
return Double.isNaN((Double) value) ? 1 : 0;
147147
} else if (FLOAT_LIST.equals(field)) {
148148
return ((List<Float>) value).stream()
149-
.filter(val -> val != null && val.equals(Float.NaN))
149+
.filter(val -> val != null && Float.isNaN(val))
150150
.count();
151151
} else if (MAP_FIELD_1.equals(field)) {
152152
return ((Map<Float, ?>) value).keySet().stream()
153-
.filter(key -> key.equals(Float.NaN))
153+
.filter(key -> Float.isNaN(key))
154154
.count();
155155
} else if (MAP_FIELD_2.equals(field)) {
156156
return ((Map<?, Double>) value).values().stream()
157-
.filter(val -> val != null && val.equals(Double.NaN))
157+
.filter(val -> val != null && Double.isNaN(val))
158158
.count();
159159
} else {
160160
throw new RuntimeException("unknown field name for getting expected NaN count: " + field.name());

data/src/test/java/org/apache/iceberg/orc/TestOrcMetrics.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ public class TestOrcMetrics extends TestMetrics {
5151
Type.TypeID.FIXED, Type.TypeID.UUID);
5252

5353
@Override
54-
protected OutputFile createFileToWriteTo() throws IOException {
54+
protected OutputFile createOutputFile() throws IOException {
5555
File tmpFolder = temp.newFolder("orc");
5656
String filename = UUID.randomUUID().toString();
5757
return Files.localOutput(new File(tmpFolder, FileFormat.ORC.addExtension(filename)));
@@ -72,7 +72,7 @@ public Metrics getMetrics(Schema schema, Record... records) throws IOException {
7272

7373
@Override
7474
public Metrics getMetrics(Schema schema, MetricsConfig metricsConfig, Record... records) throws IOException {
75-
return getMetrics(schema, createFileToWriteTo(), ImmutableMap.of(), metricsConfig, records);
75+
return getMetrics(schema, createOutputFile(), ImmutableMap.of(), metricsConfig, records);
7676
}
7777

7878
@Override

data/src/test/java/org/apache/iceberg/parquet/TestParquetMetrics.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ public FileFormat fileFormat() {
5757
}
5858

5959
@Override
60-
protected OutputFile createFileToWriteTo() throws IOException {
60+
protected OutputFile createOutputFile() throws IOException {
6161
File tmpFolder = temp.newFolder("parquet");
6262
String filename = UUID.randomUUID().toString();
6363
return Files.localOutput(new File(tmpFolder, FileFormat.PARQUET.addExtension(filename)));
@@ -70,7 +70,7 @@ public Metrics getMetrics(Schema schema, Record... records) throws IOException {
7070

7171
@Override
7272
public Metrics getMetrics(Schema schema, MetricsConfig metricsConfig, Record... records) throws IOException {
73-
return getMetrics(schema, createFileToWriteTo(), ImmutableMap.of(), metricsConfig, records);
73+
return getMetrics(schema, createOutputFile(), ImmutableMap.of(), metricsConfig, records);
7474
}
7575

7676
private Metrics getMetrics(Schema schema, OutputFile file, Map<String, String> properties,

parquet/src/main/java/org/apache/iceberg/parquet/ParquetValueWriter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ public interface ParquetValueWriter<T> {
3333

3434
/**
3535
* Returns a stream of {@link FieldMetrics} that this ParquetValueWriter keeps track of.
36-
*
36+
* <p>
3737
* Since Parquet keeps track of most metrics in its footer, for now ParquetValueWriter only keeps track of NaN
3838
* counter, and only return non-empty stream if the writer writes double or float values either by itself or
3939
* transitively.

0 commit comments

Comments
 (0)