Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 5 additions & 42 deletions parquet-tools/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,50 +61,13 @@ java jar ./parquet-tools-<VERSION>.jar <command> my_parquet_file.lzo.parquet

## Commands Usage

To run it on hadoop, you should use "hadoop jar" instead of "java jar"
To see usage instructions for all commands:

```sh
usage: java -jar ./parquet-tools-<VERSION>.jar cat [option...] <input>
where option is one of:
--debug Disable color output even if supported
-h,--help Show this help string
--no-color Disable color output even if supported
where <input> is the parquet file to print to stdout

usage: java -jar ./parquet-tools-<VERSION>.jar head [option...] <input>
where option is one of:
--debug Disable color output even if supported
-h,--help Show this help string
-n,--records <arg> The number of records to show (default: 5)
--no-color Disable color output even if supported
where <input> is the parquet file to print to stdout

usage: java -jar ./parquet-tools-<VERSION>.jar schema [option...] <input>
where option is one of:
-d,--detailed <arg> Show detailed information about the schema.
--debug Disable color output even if supported
-h,--help Show this help string
--no-color Disable color output even if supported
where <input> is the parquet file containing the schema to show

usage: java -jar ./parquet-tools-<VERSION>.jar meta [option...] <input>
where option is one of:
--debug Disable color output even if supported
-h,--help Show this help string
--no-color Disable color output even if supported
where <input> is the parquet file to print to stdout

usage: java -jar dump [option...] <input>
where option is one of:
-c,--column <arg> Dump only the given column, can be specified more than
once
-d,--disable-data Do not dump column data
--debug Disable color output even if supported
-h,--help Show this help string
-m,--disable-meta Do not dump row group and page metadata
--no-color Disable color output even if supported
where <input> is the parquet file to print to stdout
```
java jar ./parquet-tools-<VERSION>.jar --help
```

**Note:** To run it on hadoop, you should use `hadoop jar` instead of `java jar`

## Meta Legend

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,18 @@ public class DumpCommand extends ArgsOnlyCommand {
.withDescription("Do not dump column data")
.create('d');

Option nocrop = OptionBuilder.withLongOpt("disable-crop")
.withDescription("Do not crop the output based on console width")
.create('n');

Option cl = OptionBuilder.withLongOpt("column")
.withDescription("Dump only the given column, can be specified more than once")
.hasArgs()
.create('c');

OPTIONS.addOption(md);
OPTIONS.addOption(dt);
OPTIONS.addOption(nocrop);
OPTIONS.addOption(cl);
}

Expand Down Expand Up @@ -122,24 +127,17 @@ public void execute(CommandLine options) throws Exception {
ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inpath, NO_FILTER);
MessageType schema = metaData.getFileMetaData().getSchema();

PrettyPrintWriter out = PrettyPrintWriter.stdoutPrettyPrinter()
.withAutoColumn()
.withAutoCrop()
.withWhitespaceHandler(WhiteSpaceHandler.ELIMINATE_NEWLINES)
.withColumnPadding(1)
.withMaxBufferedLines(1000000)
.withFlushOnTab()
.build();

boolean showmd = !options.hasOption('m');
boolean showdt = !options.hasOption('d');
boolean cropoutput = !options.hasOption('n');

Set<String> showColumns = null;
if (options.hasOption('c')) {
String[] cols = options.getOptionValues('c');
showColumns = new HashSet<String>(Arrays.asList(cols));
}

PrettyPrintWriter out = prettyPrintWriter(cropoutput);
dump(out, metaData, schema, inpath, showmd, showdt, showColumns);
}

Expand Down Expand Up @@ -346,6 +344,21 @@ public static BigInteger binaryToBigInteger(Binary value) {
return new BigInteger(data);
}

private static PrettyPrintWriter prettyPrintWriter(boolean cropOutput) {
PrettyPrintWriter.Builder builder = PrettyPrintWriter.stdoutPrettyPrinter()
.withAutoColumn()
.withWhitespaceHandler(WhiteSpaceHandler.ELIMINATE_NEWLINES)
.withColumnPadding(1)
.withMaxBufferedLines(1000000)
.withFlushOnTab();

if (cropOutput) {
builder.withAutoCrop();
}

return builder.build();
}

private static final class DumpGroupConverter extends GroupConverter {
@Override public void start() { }
@Override public void end() { }
Expand Down