Skip to content

Commit ea402be

Browse files
djhworldjulienledem
authored andcommitted
PARQUET-668 - Provide option to disable auto crop feature in dump
https://issues.apache.org/jira/browse/PARQUET-668 1. Added option `--disable-crop` 2. Updated `README.md` to reflect changes Author: djhworld <djharperuk@gmail.com> Closes #358 from djhworld/master and squashes the following commits: 493c3d0 [djhworld] PARQUET-668: Removed usage instructions from README, replaced with --help flag 696a5e6 [djhworld] PARQUET-668 -> Updated README.md to fix issue in usage string 6cbf59b [djhworld] PARQUET-668 - Provide option to disable auto crop feature in DumpCommand output
1 parent 5c85b8d commit ea402be

File tree

2 files changed

+27
-51
lines changed

2 files changed

+27
-51
lines changed

parquet-tools/README.md

Lines changed: 5 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -61,50 +61,13 @@ java jar ./parquet-tools-<VERSION>.jar <command> my_parquet_file.lzo.parquet
6161

6262
## Commands Usage
6363

64-
To run it on hadoop, you should use "hadoop jar" instead of "java jar"
64+
To see usage instructions for all commands:
6565

66-
```sh
67-
usage: java -jar ./parquet-tools-<VERSION>.jar cat [option...] <input>
68-
where option is one of:
69-
--debug Disable color output even if supported
70-
-h,--help Show this help string
71-
--no-color Disable color output even if supported
72-
where <input> is the parquet file to print to stdout
73-
74-
usage: java -jar ./parquet-tools-<VERSION>.jar head [option...] <input>
75-
where option is one of:
76-
--debug Disable color output even if supported
77-
-h,--help Show this help string
78-
-n,--records <arg> The number of records to show (default: 5)
79-
--no-color Disable color output even if supported
80-
where <input> is the parquet file to print to stdout
81-
82-
usage: java -jar ./parquet-tools-<VERSION>.jar schema [option...] <input>
83-
where option is one of:
84-
-d,--detailed <arg> Show detailed information about the schema.
85-
--debug Disable color output even if supported
86-
-h,--help Show this help string
87-
--no-color Disable color output even if supported
88-
where <input> is the parquet file containing the schema to show
89-
90-
usage: java -jar ./parquet-tools-<VERSION>.jar meta [option...] <input>
91-
where option is one of:
92-
--debug Disable color output even if supported
93-
-h,--help Show this help string
94-
--no-color Disable color output even if supported
95-
where <input> is the parquet file to print to stdout
96-
97-
usage: java -jar dump [option...] <input>
98-
where option is one of:
99-
-c,--column <arg> Dump only the given column, can be specified more than
100-
once
101-
-d,--disable-data Do not dump column data
102-
--debug Disable color output even if supported
103-
-h,--help Show this help string
104-
-m,--disable-meta Do not dump row group and page metadata
105-
--no-color Disable color output even if supported
106-
where <input> is the parquet file to print to stdout
10766
```
67+
java jar ./parquet-tools-<VERSION>.jar --help
68+
```
69+
70+
**Note:** To run it on hadoop, you should use `hadoop jar` instead of `java jar`
10871

10972
## Meta Legend
11073

parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -85,13 +85,18 @@ public class DumpCommand extends ArgsOnlyCommand {
8585
.withDescription("Do not dump column data")
8686
.create('d');
8787

88+
Option nocrop = OptionBuilder.withLongOpt("disable-crop")
89+
.withDescription("Do not crop the output based on console width")
90+
.create('n');
91+
8892
Option cl = OptionBuilder.withLongOpt("column")
8993
.withDescription("Dump only the given column, can be specified more than once")
9094
.hasArgs()
9195
.create('c');
9296

9397
OPTIONS.addOption(md);
9498
OPTIONS.addOption(dt);
99+
OPTIONS.addOption(nocrop);
95100
OPTIONS.addOption(cl);
96101
}
97102

@@ -122,24 +127,17 @@ public void execute(CommandLine options) throws Exception {
122127
ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inpath, NO_FILTER);
123128
MessageType schema = metaData.getFileMetaData().getSchema();
124129

125-
PrettyPrintWriter out = PrettyPrintWriter.stdoutPrettyPrinter()
126-
.withAutoColumn()
127-
.withAutoCrop()
128-
.withWhitespaceHandler(WhiteSpaceHandler.ELIMINATE_NEWLINES)
129-
.withColumnPadding(1)
130-
.withMaxBufferedLines(1000000)
131-
.withFlushOnTab()
132-
.build();
133-
134130
boolean showmd = !options.hasOption('m');
135131
boolean showdt = !options.hasOption('d');
132+
boolean cropoutput = !options.hasOption('n');
136133

137134
Set<String> showColumns = null;
138135
if (options.hasOption('c')) {
139136
String[] cols = options.getOptionValues('c');
140137
showColumns = new HashSet<String>(Arrays.asList(cols));
141138
}
142139

140+
PrettyPrintWriter out = prettyPrintWriter(cropoutput);
143141
dump(out, metaData, schema, inpath, showmd, showdt, showColumns);
144142
}
145143

@@ -346,6 +344,21 @@ public static BigInteger binaryToBigInteger(Binary value) {
346344
return new BigInteger(data);
347345
}
348346

347+
private static PrettyPrintWriter prettyPrintWriter(boolean cropOutput) {
348+
PrettyPrintWriter.Builder builder = PrettyPrintWriter.stdoutPrettyPrinter()
349+
.withAutoColumn()
350+
.withWhitespaceHandler(WhiteSpaceHandler.ELIMINATE_NEWLINES)
351+
.withColumnPadding(1)
352+
.withMaxBufferedLines(1000000)
353+
.withFlushOnTab();
354+
355+
if (cropOutput) {
356+
builder.withAutoCrop();
357+
}
358+
359+
return builder.build();
360+
}
361+
349362
private static final class DumpGroupConverter extends GroupConverter {
350363
@Override public void start() { }
351364
@Override public void end() { }

0 commit comments

Comments
 (0)