diff --git a/parquet-tools/README.md b/parquet-tools/README.md index d60e1b4dd5..49506f387b 100644 --- a/parquet-tools/README.md +++ b/parquet-tools/README.md @@ -61,50 +61,13 @@ java jar ./parquet-tools-.jar my_parquet_file.lzo.parquet ## Commands Usage -To run it on hadoop, you should use "hadoop jar" instead of "java jar" +To see usage instructions for all commands: -```sh -usage: java -jar ./parquet-tools-.jar cat [option...] -where option is one of: - --debug Disable color output even if supported - -h,--help Show this help string - --no-color Disable color output even if supported -where is the parquet file to print to stdout - -usage: java -jar ./parquet-tools-.jar head [option...] -where option is one of: - --debug Disable color output even if supported - -h,--help Show this help string - -n,--records The number of records to show (default: 5) - --no-color Disable color output even if supported -where is the parquet file to print to stdout - -usage: java -jar ./parquet-tools-.jar schema [option...] -where option is one of: - -d,--detailed Show detailed information about the schema. - --debug Disable color output even if supported - -h,--help Show this help string - --no-color Disable color output even if supported -where is the parquet file containing the schema to show - -usage: java -jar ./parquet-tools-.jar meta [option...] -where option is one of: - --debug Disable color output even if supported - -h,--help Show this help string - --no-color Disable color output even if supported -where is the parquet file to print to stdout - -usage: java -jar dump [option...] -where option is one of: - -c,--column Dump only the given column, can be specified more than - once - -d,--disable-data Do not dump column data - --debug Disable color output even if supported - -h,--help Show this help string - -m,--disable-meta Do not dump row group and page metadata - --no-color Disable color output even if supported -where is the parquet file to print to stdout ``` +java jar ./parquet-tools-.jar --help +``` + +**Note:** To run it on hadoop, you should use `hadoop jar` instead of `java jar` ## Meta Legend diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java b/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java index 6d5e1063a0..c4ed407171 100644 --- a/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java +++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java @@ -85,6 +85,10 @@ public class DumpCommand extends ArgsOnlyCommand { .withDescription("Do not dump column data") .create('d'); + Option nocrop = OptionBuilder.withLongOpt("disable-crop") + .withDescription("Do not crop the output based on console width") + .create('n'); + Option cl = OptionBuilder.withLongOpt("column") .withDescription("Dump only the given column, can be specified more than once") .hasArgs() @@ -92,6 +96,7 @@ public class DumpCommand extends ArgsOnlyCommand { OPTIONS.addOption(md); OPTIONS.addOption(dt); + OPTIONS.addOption(nocrop); OPTIONS.addOption(cl); } @@ -122,17 +127,9 @@ public void execute(CommandLine options) throws Exception { ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inpath, NO_FILTER); MessageType schema = metaData.getFileMetaData().getSchema(); - PrettyPrintWriter out = PrettyPrintWriter.stdoutPrettyPrinter() - .withAutoColumn() - .withAutoCrop() - .withWhitespaceHandler(WhiteSpaceHandler.ELIMINATE_NEWLINES) - .withColumnPadding(1) - .withMaxBufferedLines(1000000) - .withFlushOnTab() - .build(); - boolean showmd = !options.hasOption('m'); boolean showdt = !options.hasOption('d'); + boolean cropoutput = !options.hasOption('n'); Set showColumns = null; if (options.hasOption('c')) { @@ -140,6 +137,7 @@ public void execute(CommandLine options) throws Exception { showColumns = new HashSet(Arrays.asList(cols)); } + PrettyPrintWriter out = prettyPrintWriter(cropoutput); dump(out, metaData, schema, inpath, showmd, showdt, showColumns); } @@ -346,6 +344,21 @@ public static BigInteger binaryToBigInteger(Binary value) { return new BigInteger(data); } + private static PrettyPrintWriter prettyPrintWriter(boolean cropOutput) { + PrettyPrintWriter.Builder builder = PrettyPrintWriter.stdoutPrettyPrinter() + .withAutoColumn() + .withWhitespaceHandler(WhiteSpaceHandler.ELIMINATE_NEWLINES) + .withColumnPadding(1) + .withMaxBufferedLines(1000000) + .withFlushOnTab(); + + if (cropOutput) { + builder.withAutoCrop(); + } + + return builder.build(); + } + private static final class DumpGroupConverter extends GroupConverter { @Override public void start() { } @Override public void end() { }