Skip to content

Commit 0637e2f

Browse files
nezihyigitbasirdblue
authored andcommitted
PARQUET-360: Handle all map key types with cat tool's json dump
When dumping a parquet map with `parquet-cat --json` it throws a class cast exception as it doesn't properly handle all map key types. ``` java.lang.ClassCastException: [B cannot be cast to java.lang.String at org.apache.parquet.tools.read.SimpleMapRecord.toJsonObject(SimpleMapRecord.java:34) at org.apache.parquet.tools.read.SimpleRecord.toJsonValue(SimpleRecord.java:119) at org.apache.parquet.tools.read.SimpleRecord.toJsonObject(SimpleRecord.java:112) at org.apache.parquet.tools.read.SimpleRecord.prettyPrintJson(SimpleRecord.java:106) at org.apache.parquet.tools.command.CatCommand.execute(CatCommand.java:76) at org.apache.parquet.tools.Main.main(Main.java:222) [B cannot be cast to java.lang.String ``` Author: Nezih Yigitbasi <nyigitbasi@netflix.com> Closes #259 from nezihyigitbasi/parquet-cat-json and squashes the following commits: d047502 [Nezih Yigitbasi] Add unit test e4cd545 [Nezih Yigitbasi] Get rid of deprecated methods bdc8fdf [Nezih Yigitbasi] Handle all map key types with cat tool's json dump
1 parent 66e39fc commit 0637e2f

File tree

6 files changed

+112
-8
lines changed

6 files changed

+112
-8
lines changed

parquet-tools/src/main/java/org/apache/parquet/tools/command/CatCommand.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ public void execute(CommandLine options) throws Exception {
7070
ParquetReader<SimpleRecord> reader = null;
7171
try {
7272
PrintWriter writer = new PrintWriter(Main.out, true);
73-
reader = new ParquetReader<SimpleRecord>(new Path(input), new SimpleReadSupport());
73+
reader = ParquetReader.builder(new SimpleReadSupport(), new Path(input)).build();
7474
for (SimpleRecord value = reader.read(); value != null; value = reader.read()) {
7575
if (options.hasOption('j')) {
7676
value.prettyPrintJson(writer);

parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import org.apache.parquet.column.page.DictionaryPage;
4848
import org.apache.parquet.column.page.PageReadStore;
4949
import org.apache.parquet.column.page.PageReader;
50+
import org.apache.parquet.format.converter.ParquetMetadataConverter;
5051
import org.apache.parquet.hadoop.ParquetFileReader;
5152
import org.apache.parquet.hadoop.metadata.BlockMetaData;
5253
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
@@ -62,6 +63,8 @@
6263

6364
import com.google.common.base.Joiner;
6465

66+
import static org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER;
67+
6568
public class DumpCommand extends ArgsOnlyCommand {
6669
private static final Charset UTF8 = Charset.forName("UTF-8");
6770
private static final CharsetDecoder UTF8_DECODER = UTF8.newDecoder();
@@ -115,7 +118,7 @@ public void execute(CommandLine options) throws Exception {
115118
Configuration conf = new Configuration();
116119
Path inpath = new Path(input);
117120

118-
ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inpath);
121+
ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inpath, NO_FILTER);
119122
MessageType schema = metaData.getFileMetaData().getSchema();
120123

121124
PrettyPrintWriter out = PrettyPrintWriter.stdoutPrettyPrinter()

parquet-tools/src/main/java/org/apache/parquet/tools/command/HeadCommand.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ public void execute(CommandLine options) throws Exception {
7878
ParquetReader<SimpleRecord> reader = null;
7979
try {
8080
PrintWriter writer = new PrintWriter(Main.out, true);
81-
reader = new ParquetReader<SimpleRecord>(new Path(input), new SimpleReadSupport());
81+
reader = ParquetReader.builder(new SimpleReadSupport(), new Path(input)).build();
8282
for (SimpleRecord value = reader.read(); value != null && num-- > 0; value = reader.read()) {
8383
value.prettyPrint(writer);
8484
writer.println();

parquet-tools/src/main/java/org/apache/parquet/tools/command/ShowSchemaCommand.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
import org.apache.parquet.tools.util.MetadataUtils;
3838
import org.apache.parquet.tools.util.PrettyPrintWriter;
3939

40+
import static org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER;
41+
4042
public class ShowSchemaCommand extends ArgsOnlyCommand {
4143
public static final String[] USAGE = new String[] {
4244
"<input>",
@@ -88,7 +90,7 @@ public void execute(CommandLine options) throws Exception {
8890
} else {
8991
file = path;
9092
}
91-
metaData = ParquetFileReader.readFooter(conf, file);
93+
metaData = ParquetFileReader.readFooter(conf, file, NO_FILTER);
9294
MessageType schema = metaData.getFileMetaData().getSchema();
9395

9496
Main.out.println(schema);

parquet-tools/src/main/java/org/apache/parquet/tools/read/SimpleMapRecord.java

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
package org.apache.parquet.tools.read;
2020

2121
import com.google.common.collect.Maps;
22+
import org.codehaus.jackson.node.BinaryNode;
2223

24+
import java.util.Arrays;
2325
import java.util.Map;
2426

2527
public class SimpleMapRecord extends SimpleRecord {
@@ -30,14 +32,55 @@ protected Object toJsonObject() {
3032
String key = null;
3133
Object val = null;
3234
for (NameValue kv : ((SimpleRecord) value.getValue()).values) {
33-
if (kv.getName().equals("key")) {
34-
key = (String) kv.getValue();
35-
} else if (kv.getName().equals("value")) {
36-
val = toJsonValue(kv.getValue());
35+
String kvName = kv.getName();
36+
Object kvValue = kv.getValue();
37+
if (kvName.equals("key")) {
38+
key = keyToString(kvValue);
39+
} else if (kvName.equals("value")) {
40+
val = toJsonValue(kvValue);
3741
}
3842
}
3943
result.put(key, val);
4044
}
4145
return result;
4246
}
47+
48+
String keyToString(Object kvValue) {
49+
if (kvValue == null) {
50+
return "null";
51+
}
52+
53+
Class<?> type = kvValue.getClass();
54+
if (type.isArray()) {
55+
if (type.getComponentType() == boolean.class) {
56+
return Arrays.toString((boolean[]) kvValue);
57+
}
58+
else if (type.getComponentType() == byte.class) {
59+
return new BinaryNode((byte[]) kvValue).asText();
60+
}
61+
else if (type.getComponentType() == char.class) {
62+
return Arrays.toString((char[]) kvValue);
63+
}
64+
else if (type.getComponentType() == double.class) {
65+
return Arrays.toString((double[]) kvValue);
66+
}
67+
else if (type.getComponentType() == float.class) {
68+
return Arrays.toString((float[]) kvValue);
69+
}
70+
else if (type.getComponentType() == int.class) {
71+
return Arrays.toString((int[]) kvValue);
72+
}
73+
else if (type.getComponentType() == long.class) {
74+
return Arrays.toString((long[]) kvValue);
75+
}
76+
else if (type.getComponentType() == short.class) {
77+
return Arrays.toString((short[]) kvValue);
78+
}
79+
else {
80+
return Arrays.toString((Object[]) kvValue);
81+
}
82+
} else {
83+
return String.valueOf(kvValue);
84+
}
85+
}
4386
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.parquet.tools.read;
20+
21+
22+
import org.junit.Assert;
23+
import org.junit.Test;
24+
25+
public class TestSimpleMapRecord {
26+
27+
class TestRecord {
28+
private int x;
29+
private int y;
30+
31+
public TestRecord(int x, int y) {
32+
this.x = x;
33+
this.y = y;
34+
}
35+
36+
@Override
37+
public String toString() {
38+
return "TestRecord {" + x + "," + y + "}";
39+
}
40+
}
41+
42+
@Test
43+
public void testBinary() {
44+
SimpleMapRecord r = new SimpleMapRecord();
45+
Assert.assertEquals("null", r.keyToString(null));
46+
Assert.assertEquals("[true, false, true]", r.keyToString(new boolean[]{true, false, true}));
47+
Assert.assertEquals("[a, z]", r.keyToString(new char[] { 'a', 'z' }));
48+
Assert.assertEquals("[1.0, 3.0]", r.keyToString(new double[]{1.0, 3.0 }));
49+
Assert.assertEquals("[2.0, 4.0]", r.keyToString(new float[]{2.0f, 4.0f }));
50+
Assert.assertEquals("[100, 999]", r.keyToString(new int[]{100, 999 }));
51+
Assert.assertEquals("[23, 37]", r.keyToString(new long[] { 23l, 37l }));
52+
Assert.assertEquals("[-1, -2]", r.keyToString(new short[]{(short) -1, (short) -2}));
53+
Assert.assertEquals("dGVzdA==", r.keyToString("test".getBytes()));
54+
Assert.assertEquals("TestRecord {222,333}", r.keyToString(new TestRecord(222, 333)));
55+
}
56+
}

0 commit comments

Comments
 (0)