From 501fc43ea14ca98403e859517719ee18c046df05 Mon Sep 17 00:00:00 2001 From: Himanshu Gwalani Date: Thu, 8 Jun 2023 21:04:50 +0530 Subject: [PATCH 1/3] Adding BulkDataGenerator tool --- .../BulkDataGeneratorInputFormat.java | 64 +++++ .../BulkDataGeneratorMapper.java | 108 +++++++ .../BulkDataGeneratorRecordReader.java | 56 ++++ .../BulkDataGeneratorTool.java | 268 ++++++++++++++++++ .../hbase/util/bulkdatagenerator/Utility.java | 72 +++++ 5 files changed, 568 insertions(+) create mode 100644 hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorInputFormat.java create mode 100644 hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorMapper.java create mode 100644 hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorRecordReader.java create mode 100644 hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorTool.java create mode 100644 hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/Utility.java diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorInputFormat.java new file mode 100644 index 000000000000..eaea8c78e3f2 --- /dev/null +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorInputFormat.java @@ -0,0 +1,64 @@ +package org.apache.hadoop.hbase.util.bulkdatagenerator; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class BulkDataGeneratorInputFormat extends InputFormat { + + public static final String MAPPER_TASK_COUNT_KEY = BulkDataGeneratorInputFormat.class.getName() + "mapper.task.count"; + + @Override + public List getSplits(JobContext job) throws IOException { + // Get the number of mapper tasks configured + int mapperCount = job.getConfiguration().getInt(MAPPER_TASK_COUNT_KEY, -1); + Preconditions.checkArgument(mapperCount > 1, MAPPER_TASK_COUNT_KEY + " is not set."); + + // Create a number of input splits equal to the number of mapper tasks + ArrayList splits = new ArrayList(); + for (int i = 0; i < mapperCount; ++i) { + splits.add(new FakeInputSplit()); + } + return splits; + } + + @Override + public RecordReader createRecordReader(InputSplit split, TaskAttemptContext context) + throws IOException, InterruptedException { + BulkDataGeneratorRecordReader bulkDataGeneratorRecordReader = new BulkDataGeneratorRecordReader(); + bulkDataGeneratorRecordReader.initialize(split, context); + return bulkDataGeneratorRecordReader; + } + + /** + * Dummy input split to be used by {@link BulkDataGeneratorRecordReader} + */ + private static class FakeInputSplit extends InputSplit implements Writable { + + @Override public void readFields(DataInput arg0) throws IOException { + } + + @Override public void write(DataOutput arg0) throws IOException { + } + + @Override public long getLength() throws IOException, InterruptedException { + return 0; + } + + @Override public String[] getLocations() throws IOException, InterruptedException { + return new String[0]; + } + } +} diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorMapper.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorMapper.java new file mode 100644 index 000000000000..288f9a5d2217 --- /dev/null +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorMapper.java @@ -0,0 +1,108 @@ +package org.apache.hadoop.hbase.util.bulkdatagenerator; + +import org.apache.commons.math3.util.Pair; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hbase.thirdparty.com.google.common.collect.Lists; +import org.apache.hbase.thirdparty.com.google.common.collect.Maps; + +import java.io.IOException; +import java.math.BigDecimal; +import java.util.List; +import java.util.Map; +import java.util.Random; + +public class BulkDataGeneratorMapper extends + Mapper { + + /** Counter enumeration to count number of rows generated. */ + public static enum Counters { + ROWS_GENERATED + } + + public static final String SPLIT_COUNT_KEY = BulkDataGeneratorMapper.class.getName() + "split.count"; + + private static final String ORG_ID = "00D000000000062"; + private static final int MAX_EVENT_ID = Integer.MAX_VALUE; + private static final int MAX_VEHICLE_ID = 100; + private static final int MAX_SPEED_KPH = 140; + private static final int NUM_LOCATIONS = 10; + private static int splitCount = 1; + private static final Random random = new Random(System.currentTimeMillis()); + private static final Map> LOCATIONS = Maps.newHashMapWithExpectedSize(NUM_LOCATIONS); + private static final List LOCATION_KEYS = Lists.newArrayListWithCapacity(NUM_LOCATIONS); + static { + LOCATIONS.put("Belém", new Pair<>(BigDecimal.valueOf(-01.45), BigDecimal.valueOf(-48.48))); + LOCATIONS.put("Brasília", new Pair<>(BigDecimal.valueOf(-15.78), BigDecimal.valueOf(-47.92))); + LOCATIONS.put("Campinas", new Pair<>(BigDecimal.valueOf(-22.90), BigDecimal.valueOf(-47.05))); + LOCATIONS.put("Cuiaba", new Pair<>(BigDecimal.valueOf(-07.25), BigDecimal.valueOf(-58.42))); + LOCATIONS.put("Manaus", new Pair<>(BigDecimal.valueOf(-03.10), BigDecimal.valueOf(-60.00))); + LOCATIONS.put("Porto Velho", new Pair<>(BigDecimal.valueOf(-08.75), BigDecimal.valueOf(-63.90))); + LOCATIONS.put("Recife", new Pair<>(BigDecimal.valueOf(-08.10), BigDecimal.valueOf(-34.88))); + LOCATIONS.put("Rio de Janeiro", new Pair<>(BigDecimal.valueOf(-22.90), BigDecimal.valueOf(-43.23))); + LOCATIONS.put("Santarém", new Pair<>(BigDecimal.valueOf(-02.43), BigDecimal.valueOf(-54.68))); + LOCATIONS.put("São Paulo", new Pair<>(BigDecimal.valueOf(-23.53), BigDecimal.valueOf(-46.62))); + LOCATION_KEYS.addAll(LOCATIONS.keySet()); + } + + final static byte[] COLUMN_FAMILY_BYTES = Utility.COLUMN_FAMILY.getBytes(); + + /** {@inheritDoc} */ + @Override + protected void setup(Context context) throws IOException, + InterruptedException { + Configuration c = context.getConfiguration(); + splitCount = c.getInt(SPLIT_COUNT_KEY, 1); + } + + /** + * Generates a single record based on value set to the key by {@link BulkDataGeneratorRecordReader#getCurrentKey()}. + * {@link Utility.TableColumnNames#TOOL_EVENT_ID} is first part of row key. Keeping first {@link Utility#SPLIT_PREFIX_LENGTH} characters as index of the record to be generated ensures that records are equally distributed across all regions of the table since region boundaries are generated in similar fashion. Check {@link Utility#createTable(Admin, String, int, Map)} method for region split info. + * @param key - The key having index of next record to be generated + * @param value - Value associated with the key (not used) + * @param context - Context of the mapper container + * @throws IOException + * @throws InterruptedException + */ + @Override + protected void map(Text key, NullWritable value, Context context) + throws IOException, InterruptedException { + + int recordIndex = Integer.parseInt(key.toString()); + + // <6-characters-for-region-boundary-prefix>_<15-random-characters>_ + final String toolEventId = String.format("%0" + Utility.SPLIT_PREFIX_LENGTH + "d", recordIndex%(splitCount+1)) + "_" + EnvironmentEdgeManager.currentTime() + (1e14 + (random.nextFloat() * 9e13)) + "_" + recordIndex; + final String eventId = String.valueOf(Math.abs(random.nextInt(MAX_EVENT_ID))); + final String vechileId = String.valueOf(Math.abs(random.nextInt(MAX_VEHICLE_ID))); + final String speed = String.valueOf(Math.abs(random.nextInt(MAX_SPEED_KPH))); + final String location = LOCATION_KEYS.get(random.nextInt(NUM_LOCATIONS)); + final Pair coordinates = LOCATIONS.get(location); + final BigDecimal latitude = coordinates.getFirst(); + final BigDecimal longitude = coordinates.getSecond(); + + final ImmutableBytesWritable hKey = new ImmutableBytesWritable(String.format("%s:%s", toolEventId, ORG_ID).getBytes()); + addKeyValue(context, hKey, Utility.TableColumnNames.ORG_ID, ORG_ID); + addKeyValue(context, hKey, Utility.TableColumnNames.TOOL_EVENT_ID, toolEventId); + addKeyValue(context, hKey, Utility.TableColumnNames.EVENT_ID, eventId); + addKeyValue(context, hKey, Utility.TableColumnNames.VEHICLE_ID, vechileId); + addKeyValue(context, hKey, Utility.TableColumnNames.SPEED, speed); + addKeyValue(context, hKey, Utility.TableColumnNames.LATITUDE, latitude.toString()); + addKeyValue(context, hKey, Utility.TableColumnNames.LONGITUDE, longitude.toString()); + addKeyValue(context, hKey, Utility.TableColumnNames.LOCATION, location); + addKeyValue(context, hKey, Utility.TableColumnNames.TIMESTAMP, String.valueOf(EnvironmentEdgeManager.currentTime())); + + context.getCounter(Counters.ROWS_GENERATED).increment(1); + } + + private void addKeyValue(final Context context, ImmutableBytesWritable key, final Utility.TableColumnNames columnName, final String value) + throws IOException, InterruptedException { + KeyValue kv = new KeyValue(key.get(), COLUMN_FAMILY_BYTES, columnName.getColumnName(), value.getBytes()); + context.write(key, kv); + } +} diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorRecordReader.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorRecordReader.java new file mode 100644 index 000000000000..010e53a75120 --- /dev/null +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorRecordReader.java @@ -0,0 +1,56 @@ +package org.apache.hadoop.hbase.util.bulkdatagenerator; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; + +import java.io.IOException; + +public class BulkDataGeneratorRecordReader extends RecordReader { + + private int numRecordsToCreate = 0; + private int createdRecords = 0; + private Text key = new Text(); + private NullWritable value = NullWritable.get(); + + public static final String RECORDS_PER_MAPPER_TASK_KEY = BulkDataGeneratorInputFormat.class.getName() + "records.per.mapper.task"; + + @Override + public void initialize(InputSplit split, TaskAttemptContext context) + throws IOException, InterruptedException { + // Get the number of records to create from the configuration + this.numRecordsToCreate = context.getConfiguration().getInt(RECORDS_PER_MAPPER_TASK_KEY, -1); + Preconditions.checkArgument(numRecordsToCreate > 0, "Number of records to be created by per mapper should be greater than 0."); + } + + @Override + public boolean nextKeyValue() { + createdRecords++; + return createdRecords <= numRecordsToCreate; + } + + @Override + public Text getCurrentKey() { + // Set the index of record to be created + key.set(String.valueOf(createdRecords)); + return key; + } + + @Override + public NullWritable getCurrentValue() { + return value; + } + + @Override + public float getProgress() throws IOException, InterruptedException { + return (float) createdRecords / (float) numRecordsToCreate; + } + + @Override + public void close() throws IOException { + + } +} diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorTool.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorTool.java new file mode 100644 index 000000000000..2a26f18ec20e --- /dev/null +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorTool.java @@ -0,0 +1,268 @@ +package org.apache.hadoop.hbase.util.bulkdatagenerator; + +import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; +import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; +import org.apache.hbase.thirdparty.org.apache.commons.cli.GnuParser; +import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter; +import org.apache.hbase.thirdparty.org.apache.commons.cli.Option; +import org.apache.hbase.thirdparty.org.apache.commons.cli.Options; +import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException; +import org.apache.hbase.thirdparty.org.apache.commons.cli.Parser; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2; +import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.util.GenericOptionsParser; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * A command line utility to generate pre-splitted HBase Tables with large amount (TBs) of random data, equally distributed among all regions. + */ +public class BulkDataGeneratorTool { + + private static final Logger logger = LoggerFactory.getLogger(BulkDataGeneratorTool.class); + + /** + * Prefix for the generated HFiles directory + */ + private static final String OUTPUT_DIRECTORY_PREFIX = "/bulk_data_generator/" ; + + /** + * Number of mapper container to be launched for generating of HFiles + */ + private int mapperCount; + + /** + * Number of rows to be generated by each mapper + */ + private long rowsPerMapper; + + /** + * Table for which random data needs to be generated + */ + private String table; + + /** + * Number of splits for the {@link #table}. Number of regions for the table will be ({@link #splitCount} + 1). + */ + private int splitCount; + + /** + * Flag to delete the table (before creating) if it already exists + */ + private boolean deleteTableIfExist; + + /** + * Additional HBase meta-data options to be set for the table + */ + Map tableOptions = new HashMap<>(); + + public static void main(String[] args) throws Exception { + Configuration conf = HBaseConfiguration.create(); + BulkDataGeneratorTool bulkDataGeneratorTool = new BulkDataGeneratorTool(); + bulkDataGeneratorTool.run(conf, args); + } + + public boolean run(Configuration conf, String[] args) throws IOException { + // Read CLI arguments + CommandLine line = null; + try { + Parser parser = new GnuParser(); + line = parser.parse(getOptions(), args); + readCommandLineParameters(conf, line); + } catch (ParseException | IOException exception) { + logger.error("Error while parsing CLI arguments.", exception); + return false; + } + + if(line.hasOption("-h")) { + printUsage(); + return true; + } + + Path outputDirectory = generateOutputDirectory(); + logger.info("HFiles will be generated at " + outputDirectory.toString()); + + try(Connection connection = ConnectionFactory.createConnection(conf)) { + final Admin admin = connection.getAdmin(); + final TableName tableName = TableName.valueOf(table); + if(admin.tableExists(tableName)) { + if(deleteTableIfExist) { + logger.info("Deleting the table since it already exist and delete-if-exist flag is set to true"); + Utility.deleteTable(admin, table); + } else { + logger.info("Table already exists, cannot generate HFiles for existing table."); + return false; + } + } + + // Creating the pre-split table + Utility.createTable(admin, table, splitCount, tableOptions); + logger.info(table + " created successfully"); + + Job job = createSubmittableJob(conf); + + Table hbaseTable = connection.getTable(tableName); + + // Auto configure partitioner and reducer + HFileOutputFormat2.configureIncrementalLoad(job, hbaseTable, hbaseTable.getRegionLocator()); + + FileOutputFormat.setOutputPath(job, outputDirectory); + + boolean result = job.waitForCompletion(true); + + if(result) { + logger.info("HFiles generated successfully. Starting bulk load to " + table); + LoadIncrementalHFiles loadIncrementalHFiles = new LoadIncrementalHFiles(conf); + int loadIncrementalResult = loadIncrementalHFiles.run(new String[] {outputDirectory.getFileSystem(conf).makeQualified(outputDirectory).toString(), table}); + return (loadIncrementalResult == 0); + } else { + logger.info("Failed to generate HFiles."); + return false; + } + } catch (Exception e) { + logger.error("Failed to generate data", e); + return false; + } finally { + FileSystem.get(conf).deleteOnExit(outputDirectory); + } + } + + protected Job createSubmittableJob(Configuration conf) + throws IOException { + + conf.setInt(BulkDataGeneratorMapper.SPLIT_COUNT_KEY, splitCount); + conf.setInt(BulkDataGeneratorInputFormat.MAPPER_TASK_COUNT_KEY, mapperCount); + conf.setLong(BulkDataGeneratorRecordReader.RECORDS_PER_MAPPER_TASK_KEY, rowsPerMapper); + + Job job = new Job(conf, BulkDataGeneratorTool.class.getSimpleName() + " - " + table); + + job.setJarByClass(BulkDataGeneratorMapper.class); + job.setInputFormatClass(BulkDataGeneratorInputFormat.class); + + HBaseConfiguration.addHbaseResources(conf); + + job.setMapperClass(BulkDataGeneratorMapper.class); + job.setMapOutputKeyClass(ImmutableBytesWritable.class); + job.setMapOutputValueClass(KeyValue.class); + + return job; + } + + /** + * Get the random output directory path where HFiles will be generated + * @return + */ + protected Path generateOutputDirectory() { + final String outputDirectory = OUTPUT_DIRECTORY_PREFIX + "/" + table + "-" + System.currentTimeMillis(); + return new Path(outputDirectory); + } + + /** + * This method parses the command line parameters into instance variables + * @throws ParseException + */ + protected void readCommandLineParameters(Configuration conf, CommandLine line) + throws ParseException, IOException { + final List genericParameters = new ArrayList(); + + //Parse the generic options + for (Map.Entry entry : line.getOptionProperties("D").entrySet()) { + genericParameters.add("-D"); + genericParameters.add(entry.getKey() + "=" + entry.getValue()); + } + + logger.info("Parsed generic parameters: " + Arrays.toString(genericParameters.toArray(new String[0]))); + + new GenericOptionsParser(conf, genericParameters.toArray(new String[0])); + + table = line.getOptionValue("table"); + Preconditions.checkArgument(!StringUtils.isEmpty(table), "Table name must not be empty"); + mapperCount = Integer.parseInt(line.getOptionValue("mapper-count")); + Preconditions.checkArgument(mapperCount > 0, "Mapper count must be greater than 0"); + splitCount = Integer.parseInt(line.getOptionValue("split-count")); + Preconditions.checkArgument((splitCount > 0) && (splitCount < Utility.MAX_SPLIT_COUNT), "Split count must be greater than 0 and less than " + Utility.MAX_SPLIT_COUNT); + rowsPerMapper = Long.parseLong(line.getOptionValue("rows-per-mapper")); + Preconditions.checkArgument(rowsPerMapper > 0, "Rows per mapper must be greater than 0"); + deleteTableIfExist = line.hasOption("delete-if-exist"); + parseTableOptions(line); + } + + private void parseTableOptions(final CommandLine line) { + final String tableOptionsAsString = line.getOptionValue("table-options"); + if(!StringUtils.isEmpty(tableOptionsAsString)) { + for(String tableOption : tableOptionsAsString.split(",")) { + final String[] keyValueSplit = tableOption.split("="); + final String key = keyValueSplit[0]; + final String value = keyValueSplit[1]; + tableOptions.put(key, value); + } + } + } + + /** + * @return the command line options required by the sor job. + */ + protected Options getOptions() { + final Options options = new Options(); + Option option = new Option("t", "table", true, + "The table name for which data need to be generated."); + options.addOption(option); + + option = new Option("d", "delete-if-exist", false, + "If it's set, the table will be deleted if already exist."); + options.addOption(option); + + option = new Option("mc", "mapper-count", true, + "The number of mapper containers to be launched."); + options.addOption(option); + + option = new Option("sc", "split-count", true, + "The number of regions/pre-splits to be created for the table."); + options.addOption(option); + + option = new Option("r", "rows-per-mapper", true, + "The number of rows to be generated PER mapper."); + options.addOption(option); + + option = new Option("o", "table-options", true, + "Table options to be set while creating the table."); + options.addOption(option); + + option = new Option("h", "help", false, + "Show help message for the tool"); + options.addOption(option); + + return options; + } + + protected void printUsage() { + final HelpFormatter helpFormatter = new HelpFormatter(); + helpFormatter.setWidth(120); + final String helpMessageCommand = "hbase " + BulkDataGeneratorTool.class.getName(); + final String commandSyntax = helpMessageCommand + " [-D]*"; + final String helpMessageSuffix = "Examples:\n" + + helpMessageCommand + " -t TEST_TABLE -mc 10 -r 100 -sc 10\n" + + helpMessageCommand + " -t TEST_TABLE -mc 10 -r 100 -sc 10 -d -o \"DISABLE_BACKUP=true,NORMALIZATION_ENABLED=false\"\n" + + helpMessageCommand + " -t TEST_TABLE -mc 10 -r 100 -sc 10 -Dmapreduce.map.memory.mb=8192 -Dmapreduce.map.java.opts=-Xmx7782m\n"; + helpFormatter.printHelp(commandSyntax, "", getOptions(), helpMessageSuffix); + } +} diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/Utility.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/Utility.java new file mode 100644 index 000000000000..8fcc2936e610 --- /dev/null +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/Utility.java @@ -0,0 +1,72 @@ +package org.apache.hadoop.hbase.util.bulkdatagenerator; + +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; + +import java.io.IOException; +import java.util.Map; + +public class Utility { + + /** + * Schema for HBase table to be generated by generated and populated by {@link BulkDataGeneratorTool} + */ + public enum TableColumnNames { + ORG_ID ("orgId".getBytes()), + TOOL_EVENT_ID ("toolEventId".getBytes()), + EVENT_ID ("eventId".getBytes()), + VEHICLE_ID ("vehicleId".getBytes()), + SPEED ("speed".getBytes()), + LATITUDE ("latitude".getBytes()), + LONGITUDE ("longitude".getBytes()), + LOCATION ("location".getBytes()), + TIMESTAMP ("timestamp".getBytes()); + + private final byte[] columnName; + + TableColumnNames (byte[] column) { + this.columnName = column; + } + + public byte[] getColumnName() { + return this.columnName; + } + } + + public static final String COLUMN_FAMILY = "cf"; + + public static final int SPLIT_PREFIX_LENGTH = 6; + + public static final int MAX_SPLIT_COUNT = (int) Math.pow(10, SPLIT_PREFIX_LENGTH); + + public static void deleteTable(Admin admin, String tableName) throws IOException { + admin.disableTable(TableName.valueOf(tableName)); + admin.deleteTable(TableName.valueOf(tableName)); + } + + /** + * Creates a pre-splitted HBase Table having single column family ({@link #COLUMN_FAMILY}) and sequential splits with {@link #SPLIT_PREFIX_LENGTH} length character prefix. + * Example: If a table (TEST_TABLE_1) need to be generated with splitCount as 10, table would be created with (10+1) regions with boundaries end-keys as (000000-000001, 000001-000002, 000002-000003, ...., 0000010-) + * @param admin - Admin object associated with HBase connection + * @param tableName - Name of table to be created + * @param splitCount - Number of splits for the table (Number of regions will be splitCount + 1) + * @param tableOptions - Additional HBase metadata properties to be set for the table + * @throws IOException + */ + public static void createTable(Admin admin, String tableName, int splitCount, Map tableOptions) throws IOException { + Preconditions.checkArgument(splitCount > 0, "Split count must be greater than 0"); + TableDescriptorBuilder tableDescriptorBuilder = TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName)); + tableOptions.forEach(tableDescriptorBuilder::setValue); + TableDescriptor tableDescriptor = tableDescriptorBuilder.setColumnFamily(ColumnFamilyDescriptorBuilder.of(COLUMN_FAMILY)).build(); + // Pre-splitting table based on splitCount + byte[][] splitKeys = new byte[splitCount][]; + for(int i = 0; i Date: Thu, 8 Jun 2023 23:40:28 +0530 Subject: [PATCH 2/3] Applying spotless --- hbase-mapreduce/.flattened-pom.xml | 1179 +++++++++++++++++ .../BulkDataGeneratorInputFormat.java | 51 +- .../BulkDataGeneratorMapper.java | 86 +- .../BulkDataGeneratorRecordReader.java | 29 +- .../BulkDataGeneratorTool.java | 135 +- .../hbase/util/bulkdatagenerator/Utility.java | 73 +- 6 files changed, 1422 insertions(+), 131 deletions(-) create mode 100644 hbase-mapreduce/.flattened-pom.xml diff --git a/hbase-mapreduce/.flattened-pom.xml b/hbase-mapreduce/.flattened-pom.xml new file mode 100644 index 000000000000..2c60e3aa8f64 --- /dev/null +++ b/hbase-mapreduce/.flattened-pom.xml @@ -0,0 +1,1179 @@ + + + 4.0.0 + org.apache.hbase + hbase-mapreduce + 3.0.0-beta-1-SNAPSHOT + Apache HBase - MapReduce + This module contains implementations of InputFormat, OutputFormat, Mapper, Reducer, etc which + are needed for running MR jobs on tables, WALs, HFiles and other HBase specific constructs. + It also contains a bunch of tools: RowCounter, ImportTsv, Import, Export, CompactionTool, + ExportSnapshot, WALPlayer, etc + https://hbase.apache.org/hbase-build-configuration/hbase-mapreduce + 2007 + + The Apache Software Foundation + https://www.apache.org/ + + + + Apache License, Version 2.0 + https://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + + + achouhan + Abhishek Singh Chouhan + achouhan@apache.org + +5 + + + acube123 + Amitanand S. Aiyer + acube123@apache.org + -8 + + + allan163 + Allan Yang + allan163@apache.org + +8 + + + appy + Apekshit Sharma + appy@apache.org + -8 + + + anastasia + Anastasia Braginsky + anastasia@apache.org + +2 + + + apurtell + Andrew Purtell + apurtell@apache.org + -8 + + + anoopsamjohn + Anoop Sam John + anoopsamjohn@apache.org + +5 + + + antonov + Mikhail Antonov + antonov@apache.org + -8 + + + ashishsinghi + Ashish Singhi + ashishsinghi@apache.org + +5 + + + ashu + Ashu Pachauri + ashu@apache.org + +5 + + + bharathv + Bharath Vissapragada + bharathv@apache.org + -8 + + + binlijin + Lijin Bin + binlijin@apache.org + +8 + + + brfrn169 + Toshihiro Suzuki + brfrn169@apache.org + +9 + + + busbey + Sean Busbey + busbey@apache.org + -6 + + + chenglei + Cheng Lei + chenglei@apache.org + +8 + + + chenheng + Heng Chen + chenheng@apache.org + +8 + + + chia7712 + Chia-Ping Tsai + chia7712@apache.org + +8 + + + ddas + Devaraj Das + ddas@apache.org + -8 + + + dimaspivak + Dima Spivak + dimaspivak@apache.org + -8 + + + dmeil + Doug Meil + dmeil@apache.org + -5 + + + eclark + Elliott Clark + eclark@apache.org + -8 + + + elserj + Josh Elser + elserj@apache.org + -5 + + + enis + Enis Soztutar + enis@apache.org + -8 + + + eshcar + Eshcar Hillel + eshcar@apache.org + +2 + + + fenghh + Honghua Feng + fenghh@apache.org + +8 + + + garyh + Gary Helmling + garyh@apache.org + -8 + + + gchanan + Gregory Chanan + gchanan@apache.org + -8 + + + gjacoby + Geoffrey Jacoby + gjacoby@apache.org + -5 + + + gxcheng + Guangxu Cheng + gxcheng@apache.org + +8 + + + haxiaolin + Xiaolin Ha + haxiaolin@apache.org + +8 + + + huaxiangsun + Huaxiang Sun + huaxiangsun@apache.org + -8 + + + jdcryans + Jean-Daniel Cryans + jdcryans@apache.org + -8 + + + jeffreyz + Jeffrey Zhong + jeffreyz@apache.org + -8 + + + jerryjch + Jing Chen (Jerry) He + jerryjch@apache.org + -8 + + + jyates + Jesse Yates + jyates@apache.org + -8 + + + jgray + Jonathan Gray + jgray@fb.com + -8 + + + jingchengdu + Jingcheng Du + jingchengdu@apache.org + +8 + + + esteban + Esteban Gutierrez + esteban@apache.org + -8 + + + janh + Jan Hentschel + janh@apache.org + +1 + + + jmhsieh + Jonathan Hsieh + jmhsieh@apache.org + -8 + + + jxiang + Jimmy Xiang + jxiang@apache.org + -8 + + + kannan + Kannan Muthukkaruppan + kannan@fb.com + -8 + + + karthik + Karthik Ranganathan + kranganathan@fb.com + -8 + + + larsfrancke + Lars Francke + larsfrancke@apache.org + Europe/Berlin + + + larsgeorge + Lars George + larsgeorge@apache.org + +1 + + + larsh + Lars Hofhansl + larsh@apache.org + -8 + + + liangxie + Liang Xie + liangxie@apache.org + +8 + + + liushaohui + Shaohui Liu + liushaohui@apache.org + +8 + + + liyin + Liyin Tang + liyin.tang@fb.com + -8 + + + liyu + Yu Li + liyu@apache.org + +8 + + + mbautin + Mikhail Bautin + mbautin@apache.org + -8 + + + mbertozzi + Matteo Bertozzi + mbertozzi@apache.org + 0 + + + mdrob + Mike Drob + mdrob@apache.org + -5 + + + meszibalu + Balazs Meszaros + meszibalu@apache.org + +1 + + + misty + Misty Stanley-Jones + misty@apache.org + -8 + + + ndimiduk + Nick Dimiduk + ndimiduk@apache.org + -8 + + + nihaljain + Nihal Jain + nihaljain@apache.org + +5 + + + niuyulin + Yulin Niu + niuyulin@apache.org + +8 + + + nkeywal + Nicolas Liochon + nkeywal@apache.org + +1 + + + nspiegelberg + Nicolas Spiegelberg + nspiegelberg@fb.com + -8 + + + octo47 + Andrey Stepachev + octo47@gmail.com + 0 + + + openinx + Zheng Hu + openinx@apache.org + +8 + + + pankajkumar + Pankaj Kumar + pankajkumar@apache.org + +5 + + + psomogyi + Peter Somogyi + psomogyi@apache.org + +1 + + + rajeshbabu + Rajeshbabu Chintaguntla + rajeshbabu@apache.org + +5 + + + ramkrishna + Ramkrishna S Vasudevan + ramkrishna@apache.org + +5 + + + rawson + Ryan Rawson + rawson@apache.org + -8 + + + reidchan + Reid Chan + reidchan@apache.org + +8 + + + shahrs87 + Rushabh Shah + shahrs87@apache.org + -8 + + + sakthi + Sakthi Vel + sakthi@apache.org + -8 + + + sershe + Sergey Shelukhin + sershe@apache.org + -8 + + + ssrungarapu + Srikanth Srungarapu + ssrungarapu@apache.org + -8 + + + stack + Michael Stack + stack@apache.org + -8 + + + syuanjiang + Stephen Yuan Jiang + syuanjiang@apache.org + -8 + + + taklwu + Tak-Lon (Stephen) Wu + taklwu@apache.org + -8 + + + tedyu + Ted Yu + yuzhihong@gmail.com + -8 + + + tianhang + Tianhang Tang + tianhang@apache.org + +8 + + + tianjy + tianjy@apache.org + +8 + + + todd + Todd Lipcon + todd@apache.org + -8 + + + toffer + Francis Liu + toffer@apache.org + -8 + + + vikasv + Vikas Vishwakarma + vikasv@apache.org + +5 + + + virag + Virag Kothari + virag@yahoo-inc.com + -8 + + + vjasani + Viraj Jasani + vjasani@apache.org + +5 + + + water + Xiang Li + xiangli@apache.org + +8 + + + wchevreuil + Wellington Chevreuil + wchevreuil@apache.org + 0 + + + weichiu + Wei-Chiu Chuang + weichiu@apache.org + -8 + + + xucang + Xu Cang + xucang@apache.org + -8 + + + yangzhe1991 + Phil Yang + yangzhe1991@apache.org + +8 + + + zghao + Guanghao Zhang + zghao@apache.org + +8 + + + zhangduo + Duo Zhang + zhangduo@apache.org + +8 + + + zhaobaiqiang + Baiqiang Zhao + zhaobaiqiang@apache.org + +8 + + + zjushch + Chunhui Shen + zjushch@apache.org + +8 + + + churro + Rahul Gidwani + churro@apache.org + -8 + + + yiliang + Yi Liang + yiliang@apache.org + -8 + + + zyork + Zach York + zyork@apache.org + -8 + + + meiyi + Yi Mei + meiyi@apache.org + +8 + + + wangzheng + Zheng (bsglz) Wang + wangzheng@apache.org + +8 + + + sunxin + Xin Sun + sunxin@apache.org + +8 + + + huangzhuoyue + Zhuoyue Huang + huangzhuoyue@apache.org + +8 + + + xiaoyt + Yutong Xiao + xiaoyt@apache.org + +8 + + + bbeaudreault + Bryan Beaudreault + bbeaudreault@apache.org + -5 + + + heliangjun + Liangjun He + heliangjun@apache.org + +8 + + + + + User List + user-subscribe@hbase.apache.org + user-unsubscribe@hbase.apache.org + user@hbase.apache.org + https://lists.apache.org/list.html?user@hbase.apache.org + + https://dir.gmane.org/gmane.comp.java.hadoop.hbase.user + + + + Developer List + dev-subscribe@hbase.apache.org + dev-unsubscribe@hbase.apache.org + dev@hbase.apache.org + https://lists.apache.org/list.html?dev@hbase.apache.org + + https://dir.gmane.org/gmane.comp.java.hadoop.hbase.devel + + + + Commits List + commits-subscribe@hbase.apache.org + commits-unsubscribe@hbase.apache.org + https://lists.apache.org/list.html?commits@hbase.apache.org + + + Issues List + issues-subscribe@hbase.apache.org + issues-unsubscribe@hbase.apache.org + https://lists.apache.org/list.html?issues@hbase.apache.org + + + Builds List + builds-subscribe@hbase.apache.org + builds-unsubscribe@hbase.apache.org + https://lists.apache.org/list.html?builds@hbase.apache.org + + + User (ZH) List + user-zh-subscribe@hbase.apache.org + user-zh-unsubscribe@hbase.apache.org + user-zh@hbase.apache.org + https://lists.apache.org/list.html?user-zh@hbase.apache.org + + + + scm:git:git://gitbox.apache.org/repos/asf/hbase.git/hbase-build-configuration/hbase-mapreduce + scm:git:https://gitbox.apache.org/repos/asf/hbase.git/hbase-build-configuration/hbase-mapreduce + https://gitbox.apache.org/repos/asf?p=hbase.git/hbase-build-configuration/hbase-mapreduce + + + JIRA + https://issues.apache.org/jira/browse/HBASE + + + + apache.releases.https + Apache Release Distribution Repository + https://repository.apache.org/service/local/staging/deploy/maven2 + + + apache.snapshots.https + Apache Development Snapshot Repository + https://repository.apache.org/content/repositories/snapshots + + + hbase.apache.org + HBase Website at hbase.apache.org + file:///tmp/hbase-build-configuration/hbase-mapreduce + + + + + org.apache.hbase.thirdparty + hbase-shaded-miscellaneous + 4.1.4 + compile + + + org.apache.hbase.thirdparty + hbase-shaded-netty + 4.1.4 + compile + + + org.apache.hbase.thirdparty + hbase-shaded-protobuf + 4.1.4 + compile + + + org.apache.hbase + hbase-common + 3.0.0-beta-1-SNAPSHOT + compile + + + org.apache.hbase + hbase-zookeeper + 3.0.0-beta-1-SNAPSHOT + compile + + + com.google.code.findbugs + jsr305 + + + com.github.spotbugs + spotbugs-annotations + + + + + org.apache.hbase + hbase-protocol-shaded + 3.0.0-beta-1-SNAPSHOT + compile + + + org.apache.hbase + hbase-metrics + 3.0.0-beta-1-SNAPSHOT + compile + + + org.apache.hbase + hbase-metrics-api + 3.0.0-beta-1-SNAPSHOT + compile + + + org.apache.hbase + hbase-asyncfs + 3.0.0-beta-1-SNAPSHOT + compile + + + io.dropwizard.metrics + metrics-core + 3.2.6 + compile + + + org.slf4j + slf4j-api + 1.7.30 + compile + + + io.opentelemetry + opentelemetry-api + 1.15.0 + compile + + + org.apache.hbase + hbase-client + 3.0.0-beta-1-SNAPSHOT + compile + + + org.apache.hbase + hbase-hadoop-compat + 3.0.0-beta-1-SNAPSHOT + compile + + + org.apache.hbase + hbase-server + 3.0.0-beta-1-SNAPSHOT + compile + + + commons-logging + commons-logging + + + + + org.apache.hbase + hbase-replication + 3.0.0-beta-1-SNAPSHOT + compile + + + com.github.stephenc.findbugs + findbugs-annotations + 1.3.9-1 + compile + true + + + commons-io + commons-io + 2.11.0 + compile + + + org.apache.commons + commons-lang3 + 3.9 + compile + + + org.apache.zookeeper + zookeeper + 3.5.7 + compile + + + com.google.code.findbugs + jsr305 + + + com.github.spotbugs + spotbugs-annotations + + + jline + jline + + + com.sun.jmx + jmxri + + + com.sun.jdmk + jmxtools + + + javax.jms + jms + + + io.netty + netty + + + io.netty + netty-all + + + org.slf4j + slf4j-log4j12 + + + log4j + log4j + + + + + org.apache.yetus + audience-annotations + 0.13.0 + compile + + + org.apache.hadoop + hadoop-common + 3.2.4 + compile + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-json + + + com.sun.jersey + jersey-servlet + + + com.sun.jersey + jersey-server + + + javax.servlet.jsp + jsp-api + + + javax.servlet + javax.servlet-api + + + stax + stax-api + + + io.netty + netty + + + com.google.code.findbugs + jsr305 + + + junit + junit + + + org.codehaus.jackson + * + + + org.slf4j + slf4j-log4j12 + + + log4j + log4j + + + ch.qos.reload4j + reload4j + + + org.slf4j + slf4j-reload4j + + + io.netty + netty + + + io.netty + netty-all + + + + + org.apache.hadoop + hadoop-hdfs + 3.2.4 + compile + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-server + + + javax.servlet.jsp + jsp-api + + + javax.servlet + servlet-api + + + stax + stax-api + + + xerces + xercesImpl + + + org.codehaus.jackson + * + + + com.google.guava + guava + + + org.slf4j + slf4j-log4j12 + + + log4j + log4j + + + ch.qos.reload4j + reload4j + + + org.slf4j + slf4j-reload4j + + + org.fusesource.leveldbjni + leveldbjni-all + + + org.openlabtesting.leveldbjni + leveldbjni-all + + + + + org.apache.hadoop + hadoop-hdfs + 3.2.4 + test-jar + test + + + javax.servlet.jsp + jsp-api + + + javax.servlet + servlet-api + + + stax + stax-api + + + xerces + xercesImpl + + + org.codehaus.jackson + * + + + com.google.guava + guava + + + com.sun.jersey + jersey-core + + + org.slf4j + slf4j-log4j12 + + + log4j + log4j + + + ch.qos.reload4j + reload4j + + + org.slf4j + slf4j-reload4j + + + + + javax.ws.rs + javax.ws.rs-api + 2.1.1 + test + + + org.apache.hadoop + hadoop-minicluster + 3.2.4 + test + + + javax.ws.rs + jsr311-api + + + + + org.apache.hadoop + hadoop-minikdc + 3.2.4 + test + + + org.slf4j + slf4j-log4j12 + + + ch.qos.reload4j + reload4j + + + org.slf4j + slf4j-reload4j + + + bouncycastle + bcprov-jdk15 + + + + + org.apache.hadoop + hadoop-mapreduce-client-jobclient + 3.2.4 + test-jar + test + + + org.codehaus.jackson + jackson-mapper-asl + + + org.codehaus.jackson + jackson-core-asl + + + javax.xml.bind + jaxb-api + + + javax.ws.rs + jsr311-api + + + + + + + + false + + apache.snapshots + Apache Snapshot Repository + https://repository.apache.org/snapshots + + + + + + org.apache.felix + maven-bundle-plugin + 3.3.0 + true + + + + diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorInputFormat.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorInputFormat.java index eaea8c78e3f2..f40951e945df 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorInputFormat.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorInputFormat.java @@ -1,5 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hbase.util.bulkdatagenerator; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; @@ -8,17 +30,13 @@ import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; +import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; public class BulkDataGeneratorInputFormat extends InputFormat { - public static final String MAPPER_TASK_COUNT_KEY = BulkDataGeneratorInputFormat.class.getName() + "mapper.task.count"; + public static final String MAPPER_TASK_COUNT_KEY = + BulkDataGeneratorInputFormat.class.getName() + "mapper.task.count"; @Override public List getSplits(JobContext job) throws IOException { @@ -35,9 +53,10 @@ public List getSplits(JobContext job) throws IOException { } @Override - public RecordReader createRecordReader(InputSplit split, TaskAttemptContext context) - throws IOException, InterruptedException { - BulkDataGeneratorRecordReader bulkDataGeneratorRecordReader = new BulkDataGeneratorRecordReader(); + public RecordReader createRecordReader(InputSplit split, + TaskAttemptContext context) throws IOException, InterruptedException { + BulkDataGeneratorRecordReader bulkDataGeneratorRecordReader = + new BulkDataGeneratorRecordReader(); bulkDataGeneratorRecordReader.initialize(split, context); return bulkDataGeneratorRecordReader; } @@ -47,17 +66,21 @@ public RecordReader createRecordReader(InputSplit split, Tas */ private static class FakeInputSplit extends InputSplit implements Writable { - @Override public void readFields(DataInput arg0) throws IOException { + @Override + public void readFields(DataInput arg0) throws IOException { } - @Override public void write(DataOutput arg0) throws IOException { + @Override + public void write(DataOutput arg0) throws IOException { } - @Override public long getLength() throws IOException, InterruptedException { + @Override + public long getLength() throws IOException, InterruptedException { return 0; } - @Override public String[] getLocations() throws IOException, InterruptedException { + @Override + public String[] getLocations() throws IOException, InterruptedException { return new String[0]; } } diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorMapper.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorMapper.java index 288f9a5d2217..c7317a0c7c99 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorMapper.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorMapper.java @@ -1,5 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hbase.util.bulkdatagenerator; +import java.io.IOException; +import java.math.BigDecimal; +import java.util.List; +import java.util.Map; +import java.util.Random; import org.apache.commons.math3.util.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.KeyValue; @@ -9,24 +31,20 @@ import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; + import org.apache.hbase.thirdparty.com.google.common.collect.Lists; import org.apache.hbase.thirdparty.com.google.common.collect.Maps; -import java.io.IOException; -import java.math.BigDecimal; -import java.util.List; -import java.util.Map; -import java.util.Random; - -public class BulkDataGeneratorMapper extends - Mapper { +public class BulkDataGeneratorMapper + extends Mapper { /** Counter enumeration to count number of rows generated. */ public static enum Counters { ROWS_GENERATED } - public static final String SPLIT_COUNT_KEY = BulkDataGeneratorMapper.class.getName() + "split.count"; + public static final String SPLIT_COUNT_KEY = + BulkDataGeneratorMapper.class.getName() + "split.count"; private static final String ORG_ID = "00D000000000062"; private static final int MAX_EVENT_ID = Integer.MAX_VALUE; @@ -35,7 +53,8 @@ public static enum Counters { private static final int NUM_LOCATIONS = 10; private static int splitCount = 1; private static final Random random = new Random(System.currentTimeMillis()); - private static final Map> LOCATIONS = Maps.newHashMapWithExpectedSize(NUM_LOCATIONS); + private static final Map> LOCATIONS = + Maps.newHashMapWithExpectedSize(NUM_LOCATIONS); private static final List LOCATION_KEYS = Lists.newArrayListWithCapacity(NUM_LOCATIONS); static { LOCATIONS.put("Belém", new Pair<>(BigDecimal.valueOf(-01.45), BigDecimal.valueOf(-48.48))); @@ -43,9 +62,11 @@ public static enum Counters { LOCATIONS.put("Campinas", new Pair<>(BigDecimal.valueOf(-22.90), BigDecimal.valueOf(-47.05))); LOCATIONS.put("Cuiaba", new Pair<>(BigDecimal.valueOf(-07.25), BigDecimal.valueOf(-58.42))); LOCATIONS.put("Manaus", new Pair<>(BigDecimal.valueOf(-03.10), BigDecimal.valueOf(-60.00))); - LOCATIONS.put("Porto Velho", new Pair<>(BigDecimal.valueOf(-08.75), BigDecimal.valueOf(-63.90))); + LOCATIONS.put("Porto Velho", + new Pair<>(BigDecimal.valueOf(-08.75), BigDecimal.valueOf(-63.90))); LOCATIONS.put("Recife", new Pair<>(BigDecimal.valueOf(-08.10), BigDecimal.valueOf(-34.88))); - LOCATIONS.put("Rio de Janeiro", new Pair<>(BigDecimal.valueOf(-22.90), BigDecimal.valueOf(-43.23))); + LOCATIONS.put("Rio de Janeiro", + new Pair<>(BigDecimal.valueOf(-22.90), BigDecimal.valueOf(-43.23))); LOCATIONS.put("Santarém", new Pair<>(BigDecimal.valueOf(-02.43), BigDecimal.valueOf(-54.68))); LOCATIONS.put("São Paulo", new Pair<>(BigDecimal.valueOf(-23.53), BigDecimal.valueOf(-46.62))); LOCATION_KEYS.addAll(LOCATIONS.keySet()); @@ -55,29 +76,34 @@ public static enum Counters { /** {@inheritDoc} */ @Override - protected void setup(Context context) throws IOException, - InterruptedException { + protected void setup(Context context) throws IOException, InterruptedException { Configuration c = context.getConfiguration(); splitCount = c.getInt(SPLIT_COUNT_KEY, 1); } /** - * Generates a single record based on value set to the key by {@link BulkDataGeneratorRecordReader#getCurrentKey()}. - * {@link Utility.TableColumnNames#TOOL_EVENT_ID} is first part of row key. Keeping first {@link Utility#SPLIT_PREFIX_LENGTH} characters as index of the record to be generated ensures that records are equally distributed across all regions of the table since region boundaries are generated in similar fashion. Check {@link Utility#createTable(Admin, String, int, Map)} method for region split info. - * @param key - The key having index of next record to be generated - * @param value - Value associated with the key (not used) + * Generates a single record based on value set to the key by + * {@link BulkDataGeneratorRecordReader#getCurrentKey()}. + * {@link Utility.TableColumnNames#TOOL_EVENT_ID} is first part of row key. Keeping first + * {@link Utility#SPLIT_PREFIX_LENGTH} characters as index of the record to be generated ensures + * that records are equally distributed across all regions of the table since region boundaries + * are generated in similar fashion. Check {@link Utility#createTable(Admin, String, int, Map)} + * method for region split info. + * @param key - The key having index of next record to be generated + * @param value - Value associated with the key (not used) * @param context - Context of the mapper container - * @throws IOException - * @throws InterruptedException */ @Override protected void map(Text key, NullWritable value, Context context) - throws IOException, InterruptedException { + throws IOException, InterruptedException { - int recordIndex = Integer.parseInt(key.toString()); + int recordIndex = Integer.parseInt(key.toString()); // <6-characters-for-region-boundary-prefix>_<15-random-characters>_ - final String toolEventId = String.format("%0" + Utility.SPLIT_PREFIX_LENGTH + "d", recordIndex%(splitCount+1)) + "_" + EnvironmentEdgeManager.currentTime() + (1e14 + (random.nextFloat() * 9e13)) + "_" + recordIndex; + final String toolEventId = + String.format("%0" + Utility.SPLIT_PREFIX_LENGTH + "d", recordIndex % (splitCount + 1)) + "_" + + EnvironmentEdgeManager.currentTime() + (1e14 + (random.nextFloat() * 9e13)) + "_" + + recordIndex; final String eventId = String.valueOf(Math.abs(random.nextInt(MAX_EVENT_ID))); final String vechileId = String.valueOf(Math.abs(random.nextInt(MAX_VEHICLE_ID))); final String speed = String.valueOf(Math.abs(random.nextInt(MAX_SPEED_KPH))); @@ -86,7 +112,8 @@ protected void map(Text key, NullWritable value, Context context) final BigDecimal latitude = coordinates.getFirst(); final BigDecimal longitude = coordinates.getSecond(); - final ImmutableBytesWritable hKey = new ImmutableBytesWritable(String.format("%s:%s", toolEventId, ORG_ID).getBytes()); + final ImmutableBytesWritable hKey = + new ImmutableBytesWritable(String.format("%s:%s", toolEventId, ORG_ID).getBytes()); addKeyValue(context, hKey, Utility.TableColumnNames.ORG_ID, ORG_ID); addKeyValue(context, hKey, Utility.TableColumnNames.TOOL_EVENT_ID, toolEventId); addKeyValue(context, hKey, Utility.TableColumnNames.EVENT_ID, eventId); @@ -95,14 +122,17 @@ protected void map(Text key, NullWritable value, Context context) addKeyValue(context, hKey, Utility.TableColumnNames.LATITUDE, latitude.toString()); addKeyValue(context, hKey, Utility.TableColumnNames.LONGITUDE, longitude.toString()); addKeyValue(context, hKey, Utility.TableColumnNames.LOCATION, location); - addKeyValue(context, hKey, Utility.TableColumnNames.TIMESTAMP, String.valueOf(EnvironmentEdgeManager.currentTime())); + addKeyValue(context, hKey, Utility.TableColumnNames.TIMESTAMP, + String.valueOf(EnvironmentEdgeManager.currentTime())); context.getCounter(Counters.ROWS_GENERATED).increment(1); } - private void addKeyValue(final Context context, ImmutableBytesWritable key, final Utility.TableColumnNames columnName, final String value) - throws IOException, InterruptedException { - KeyValue kv = new KeyValue(key.get(), COLUMN_FAMILY_BYTES, columnName.getColumnName(), value.getBytes()); + private void addKeyValue(final Context context, ImmutableBytesWritable key, + final Utility.TableColumnNames columnName, final String value) + throws IOException, InterruptedException { + KeyValue kv = + new KeyValue(key.get(), COLUMN_FAMILY_BYTES, columnName.getColumnName(), value.getBytes()); context.write(key, kv); } } diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorRecordReader.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorRecordReader.java index 010e53a75120..f4ecc659e51b 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorRecordReader.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorRecordReader.java @@ -1,13 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hbase.util.bulkdatagenerator; +import java.io.IOException; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; -import java.io.IOException; +import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; public class BulkDataGeneratorRecordReader extends RecordReader { @@ -16,14 +33,16 @@ public class BulkDataGeneratorRecordReader extends RecordReader 0, "Number of records to be created by per mapper should be greater than 0."); + Preconditions.checkArgument(numRecordsToCreate > 0, + "Number of records to be created by per mapper should be greater than 0."); } @Override diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorTool.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorTool.java index 2a26f18ec20e..322431cd90b3 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorTool.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/BulkDataGeneratorTool.java @@ -1,15 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hbase.util.bulkdatagenerator; -import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; -import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; -import org.apache.hbase.thirdparty.org.apache.commons.cli.GnuParser; -import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter; -import org.apache.hbase.thirdparty.org.apache.commons.cli.Option; -import org.apache.hbase.thirdparty.org.apache.commons.cli.Options; -import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException; -import org.apache.hbase.thirdparty.org.apache.commons.cli.Parser; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -25,16 +38,21 @@ import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; +import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; +import org.apache.hbase.thirdparty.org.apache.commons.cli.GnuParser; +import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter; +import org.apache.hbase.thirdparty.org.apache.commons.cli.Option; +import org.apache.hbase.thirdparty.org.apache.commons.cli.Options; +import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException; +import org.apache.hbase.thirdparty.org.apache.commons.cli.Parser; /** - * A command line utility to generate pre-splitted HBase Tables with large amount (TBs) of random data, equally distributed among all regions. + * A command line utility to generate pre-splitted HBase Tables with large amount (TBs) of random + * data, equally distributed among all regions. */ public class BulkDataGeneratorTool { @@ -43,7 +61,7 @@ public class BulkDataGeneratorTool { /** * Prefix for the generated HFiles directory */ - private static final String OUTPUT_DIRECTORY_PREFIX = "/bulk_data_generator/" ; + private static final String OUTPUT_DIRECTORY_PREFIX = "/bulk_data_generator/"; /** * Number of mapper container to be launched for generating of HFiles @@ -61,7 +79,8 @@ public class BulkDataGeneratorTool { private String table; /** - * Number of splits for the {@link #table}. Number of regions for the table will be ({@link #splitCount} + 1). + * Number of splits for the {@link #table}. Number of regions for the table will be + * ({@link #splitCount} + 1). */ private int splitCount; @@ -93,7 +112,7 @@ public boolean run(Configuration conf, String[] args) throws IOException { return false; } - if(line.hasOption("-h")) { + if (line.hasOption("-h")) { printUsage(); return true; } @@ -101,12 +120,13 @@ public boolean run(Configuration conf, String[] args) throws IOException { Path outputDirectory = generateOutputDirectory(); logger.info("HFiles will be generated at " + outputDirectory.toString()); - try(Connection connection = ConnectionFactory.createConnection(conf)) { + try (Connection connection = ConnectionFactory.createConnection(conf)) { final Admin admin = connection.getAdmin(); final TableName tableName = TableName.valueOf(table); - if(admin.tableExists(tableName)) { - if(deleteTableIfExist) { - logger.info("Deleting the table since it already exist and delete-if-exist flag is set to true"); + if (admin.tableExists(tableName)) { + if (deleteTableIfExist) { + logger.info( + "Deleting the table since it already exist and delete-if-exist flag is set to true"); Utility.deleteTable(admin, table); } else { logger.info("Table already exists, cannot generate HFiles for existing table."); @@ -129,10 +149,11 @@ public boolean run(Configuration conf, String[] args) throws IOException { boolean result = job.waitForCompletion(true); - if(result) { + if (result) { logger.info("HFiles generated successfully. Starting bulk load to " + table); LoadIncrementalHFiles loadIncrementalHFiles = new LoadIncrementalHFiles(conf); - int loadIncrementalResult = loadIncrementalHFiles.run(new String[] {outputDirectory.getFileSystem(conf).makeQualified(outputDirectory).toString(), table}); + int loadIncrementalResult = loadIncrementalHFiles.run(new String[] { + outputDirectory.getFileSystem(conf).makeQualified(outputDirectory).toString(), table }); return (loadIncrementalResult == 0); } else { logger.info("Failed to generate HFiles."); @@ -146,8 +167,7 @@ public boolean run(Configuration conf, String[] args) throws IOException { } } - protected Job createSubmittableJob(Configuration conf) - throws IOException { + protected Job createSubmittableJob(Configuration conf) throws IOException { conf.setInt(BulkDataGeneratorMapper.SPLIT_COUNT_KEY, splitCount); conf.setInt(BulkDataGeneratorInputFormat.MAPPER_TASK_COUNT_KEY, mapperCount); @@ -167,30 +187,28 @@ protected Job createSubmittableJob(Configuration conf) return job; } - /** - * Get the random output directory path where HFiles will be generated - * @return - */ + /** Returns Random output directory path where HFiles will be generated */ protected Path generateOutputDirectory() { - final String outputDirectory = OUTPUT_DIRECTORY_PREFIX + "/" + table + "-" + System.currentTimeMillis(); + final String outputDirectory = + OUTPUT_DIRECTORY_PREFIX + "/" + table + "-" + System.currentTimeMillis(); return new Path(outputDirectory); } /** * This method parses the command line parameters into instance variables - * @throws ParseException */ protected void readCommandLineParameters(Configuration conf, CommandLine line) - throws ParseException, IOException { + throws ParseException, IOException { final List genericParameters = new ArrayList(); - //Parse the generic options + // Parse the generic options for (Map.Entry entry : line.getOptionProperties("D").entrySet()) { genericParameters.add("-D"); genericParameters.add(entry.getKey() + "=" + entry.getValue()); } - logger.info("Parsed generic parameters: " + Arrays.toString(genericParameters.toArray(new String[0]))); + logger.info( + "Parsed generic parameters: " + Arrays.toString(genericParameters.toArray(new String[0]))); new GenericOptionsParser(conf, genericParameters.toArray(new String[0])); @@ -199,7 +217,8 @@ protected void readCommandLineParameters(Configuration conf, CommandLine line) mapperCount = Integer.parseInt(line.getOptionValue("mapper-count")); Preconditions.checkArgument(mapperCount > 0, "Mapper count must be greater than 0"); splitCount = Integer.parseInt(line.getOptionValue("split-count")); - Preconditions.checkArgument((splitCount > 0) && (splitCount < Utility.MAX_SPLIT_COUNT), "Split count must be greater than 0 and less than " + Utility.MAX_SPLIT_COUNT); + Preconditions.checkArgument((splitCount > 0) && (splitCount < Utility.MAX_SPLIT_COUNT), + "Split count must be greater than 0 and less than " + Utility.MAX_SPLIT_COUNT); rowsPerMapper = Long.parseLong(line.getOptionValue("rows-per-mapper")); Preconditions.checkArgument(rowsPerMapper > 0, "Rows per mapper must be greater than 0"); deleteTableIfExist = line.hasOption("delete-if-exist"); @@ -208,8 +227,8 @@ protected void readCommandLineParameters(Configuration conf, CommandLine line) private void parseTableOptions(final CommandLine line) { final String tableOptionsAsString = line.getOptionValue("table-options"); - if(!StringUtils.isEmpty(tableOptionsAsString)) { - for(String tableOption : tableOptionsAsString.split(",")) { + if (!StringUtils.isEmpty(tableOptionsAsString)) { + for (String tableOption : tableOptionsAsString.split(",")) { final String[] keyValueSplit = tableOption.split("="); final String key = keyValueSplit[0]; final String value = keyValueSplit[1]; @@ -218,37 +237,34 @@ private void parseTableOptions(final CommandLine line) { } } - /** - * @return the command line options required by the sor job. - */ + /** Returns the command line option for {@link BulkDataGeneratorTool} */ protected Options getOptions() { final Options options = new Options(); - Option option = new Option("t", "table", true, - "The table name for which data need to be generated."); + Option option = + new Option("t", "table", true, "The table name for which data need to be generated."); options.addOption(option); option = new Option("d", "delete-if-exist", false, - "If it's set, the table will be deleted if already exist."); + "If it's set, the table will be deleted if already exist."); options.addOption(option); - option = new Option("mc", "mapper-count", true, - "The number of mapper containers to be launched."); + option = + new Option("mc", "mapper-count", true, "The number of mapper containers to be launched."); options.addOption(option); option = new Option("sc", "split-count", true, - "The number of regions/pre-splits to be created for the table."); + "The number of regions/pre-splits to be created for the table."); options.addOption(option); - option = new Option("r", "rows-per-mapper", true, - "The number of rows to be generated PER mapper."); + option = + new Option("r", "rows-per-mapper", true, "The number of rows to be generated PER mapper."); options.addOption(option); - option = new Option("o", "table-options", true, - "Table options to be set while creating the table."); + option = + new Option("o", "table-options", true, "Table options to be set while creating the table."); options.addOption(option); - option = new Option("h", "help", false, - "Show help message for the tool"); + option = new Option("h", "help", false, "Show help message for the tool"); options.addOption(option); return options; @@ -259,10 +275,11 @@ protected void printUsage() { helpFormatter.setWidth(120); final String helpMessageCommand = "hbase " + BulkDataGeneratorTool.class.getName(); final String commandSyntax = helpMessageCommand + " [-D]*"; - final String helpMessageSuffix = "Examples:\n" - + helpMessageCommand + " -t TEST_TABLE -mc 10 -r 100 -sc 10\n" - + helpMessageCommand + " -t TEST_TABLE -mc 10 -r 100 -sc 10 -d -o \"DISABLE_BACKUP=true,NORMALIZATION_ENABLED=false\"\n" - + helpMessageCommand + " -t TEST_TABLE -mc 10 -r 100 -sc 10 -Dmapreduce.map.memory.mb=8192 -Dmapreduce.map.java.opts=-Xmx7782m\n"; + final String helpMessageSuffix = "Examples:\n" + helpMessageCommand + + " -t TEST_TABLE -mc 10 -r 100 -sc 10\n" + helpMessageCommand + + " -t TEST_TABLE -mc 10 -r 100 -sc 10 -d -o \"DISABLE_BACKUP=true,NORMALIZATION_ENABLED=false\"\n" + + helpMessageCommand + + " -t TEST_TABLE -mc 10 -r 100 -sc 10 -Dmapreduce.map.memory.mb=8192 -Dmapreduce.map.java.opts=-Xmx7782m\n"; helpFormatter.printHelp(commandSyntax, "", getOptions(), helpMessageSuffix); } } diff --git a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/Utility.java b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/Utility.java index 8fcc2936e610..cc5488224cab 100644 --- a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/Utility.java +++ b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/util/bulkdatagenerator/Utility.java @@ -1,34 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hbase.util.bulkdatagenerator; +import java.io.IOException; +import java.util.Map; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableDescriptorBuilder; -import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; -import java.io.IOException; -import java.util.Map; +import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; public class Utility { /** - * Schema for HBase table to be generated by generated and populated by {@link BulkDataGeneratorTool} + * Schema for HBase table to be generated by generated and populated by + * {@link BulkDataGeneratorTool} */ public enum TableColumnNames { - ORG_ID ("orgId".getBytes()), - TOOL_EVENT_ID ("toolEventId".getBytes()), - EVENT_ID ("eventId".getBytes()), - VEHICLE_ID ("vehicleId".getBytes()), - SPEED ("speed".getBytes()), - LATITUDE ("latitude".getBytes()), - LONGITUDE ("longitude".getBytes()), - LOCATION ("location".getBytes()), - TIMESTAMP ("timestamp".getBytes()); + ORG_ID("orgId".getBytes()), + TOOL_EVENT_ID("toolEventId".getBytes()), + EVENT_ID("eventId".getBytes()), + VEHICLE_ID("vehicleId".getBytes()), + SPEED("speed".getBytes()), + LATITUDE("latitude".getBytes()), + LONGITUDE("longitude".getBytes()), + LOCATION("location".getBytes()), + TIMESTAMP("timestamp".getBytes()); private final byte[] columnName; - TableColumnNames (byte[] column) { + TableColumnNames(byte[] column) { this.columnName = column; } @@ -49,23 +67,28 @@ public static void deleteTable(Admin admin, String tableName) throws IOException } /** - * Creates a pre-splitted HBase Table having single column family ({@link #COLUMN_FAMILY}) and sequential splits with {@link #SPLIT_PREFIX_LENGTH} length character prefix. - * Example: If a table (TEST_TABLE_1) need to be generated with splitCount as 10, table would be created with (10+1) regions with boundaries end-keys as (000000-000001, 000001-000002, 000002-000003, ...., 0000010-) - * @param admin - Admin object associated with HBase connection - * @param tableName - Name of table to be created - * @param splitCount - Number of splits for the table (Number of regions will be splitCount + 1) + * Creates a pre-splitted HBase Table having single column family ({@link #COLUMN_FAMILY}) and + * sequential splits with {@link #SPLIT_PREFIX_LENGTH} length character prefix. Example: If a + * table (TEST_TABLE_1) need to be generated with splitCount as 10, table would be created with + * (10+1) regions with boundaries end-keys as (000000-000001, 000001-000002, 000002-000003, ...., + * 0000010-) + * @param admin - Admin object associated with HBase connection + * @param tableName - Name of table to be created + * @param splitCount - Number of splits for the table (Number of regions will be splitCount + 1) * @param tableOptions - Additional HBase metadata properties to be set for the table - * @throws IOException */ - public static void createTable(Admin admin, String tableName, int splitCount, Map tableOptions) throws IOException { + public static void createTable(Admin admin, String tableName, int splitCount, + Map tableOptions) throws IOException { Preconditions.checkArgument(splitCount > 0, "Split count must be greater than 0"); - TableDescriptorBuilder tableDescriptorBuilder = TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName)); + TableDescriptorBuilder tableDescriptorBuilder = + TableDescriptorBuilder.newBuilder(TableName.valueOf(tableName)); tableOptions.forEach(tableDescriptorBuilder::setValue); - TableDescriptor tableDescriptor = tableDescriptorBuilder.setColumnFamily(ColumnFamilyDescriptorBuilder.of(COLUMN_FAMILY)).build(); + TableDescriptor tableDescriptor = tableDescriptorBuilder + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(COLUMN_FAMILY)).build(); // Pre-splitting table based on splitCount byte[][] splitKeys = new byte[splitCount][]; - for(int i = 0; i Date: Thu, 8 Jun 2023 23:46:54 +0530 Subject: [PATCH 3/3] Removing unwanted pom changes --- hbase-mapreduce/.flattened-pom.xml | 1179 ---------------------------- 1 file changed, 1179 deletions(-) delete mode 100644 hbase-mapreduce/.flattened-pom.xml diff --git a/hbase-mapreduce/.flattened-pom.xml b/hbase-mapreduce/.flattened-pom.xml deleted file mode 100644 index 2c60e3aa8f64..000000000000 --- a/hbase-mapreduce/.flattened-pom.xml +++ /dev/null @@ -1,1179 +0,0 @@ - - - 4.0.0 - org.apache.hbase - hbase-mapreduce - 3.0.0-beta-1-SNAPSHOT - Apache HBase - MapReduce - This module contains implementations of InputFormat, OutputFormat, Mapper, Reducer, etc which - are needed for running MR jobs on tables, WALs, HFiles and other HBase specific constructs. - It also contains a bunch of tools: RowCounter, ImportTsv, Import, Export, CompactionTool, - ExportSnapshot, WALPlayer, etc - https://hbase.apache.org/hbase-build-configuration/hbase-mapreduce - 2007 - - The Apache Software Foundation - https://www.apache.org/ - - - - Apache License, Version 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt - repo - - - - - achouhan - Abhishek Singh Chouhan - achouhan@apache.org - +5 - - - acube123 - Amitanand S. Aiyer - acube123@apache.org - -8 - - - allan163 - Allan Yang - allan163@apache.org - +8 - - - appy - Apekshit Sharma - appy@apache.org - -8 - - - anastasia - Anastasia Braginsky - anastasia@apache.org - +2 - - - apurtell - Andrew Purtell - apurtell@apache.org - -8 - - - anoopsamjohn - Anoop Sam John - anoopsamjohn@apache.org - +5 - - - antonov - Mikhail Antonov - antonov@apache.org - -8 - - - ashishsinghi - Ashish Singhi - ashishsinghi@apache.org - +5 - - - ashu - Ashu Pachauri - ashu@apache.org - +5 - - - bharathv - Bharath Vissapragada - bharathv@apache.org - -8 - - - binlijin - Lijin Bin - binlijin@apache.org - +8 - - - brfrn169 - Toshihiro Suzuki - brfrn169@apache.org - +9 - - - busbey - Sean Busbey - busbey@apache.org - -6 - - - chenglei - Cheng Lei - chenglei@apache.org - +8 - - - chenheng - Heng Chen - chenheng@apache.org - +8 - - - chia7712 - Chia-Ping Tsai - chia7712@apache.org - +8 - - - ddas - Devaraj Das - ddas@apache.org - -8 - - - dimaspivak - Dima Spivak - dimaspivak@apache.org - -8 - - - dmeil - Doug Meil - dmeil@apache.org - -5 - - - eclark - Elliott Clark - eclark@apache.org - -8 - - - elserj - Josh Elser - elserj@apache.org - -5 - - - enis - Enis Soztutar - enis@apache.org - -8 - - - eshcar - Eshcar Hillel - eshcar@apache.org - +2 - - - fenghh - Honghua Feng - fenghh@apache.org - +8 - - - garyh - Gary Helmling - garyh@apache.org - -8 - - - gchanan - Gregory Chanan - gchanan@apache.org - -8 - - - gjacoby - Geoffrey Jacoby - gjacoby@apache.org - -5 - - - gxcheng - Guangxu Cheng - gxcheng@apache.org - +8 - - - haxiaolin - Xiaolin Ha - haxiaolin@apache.org - +8 - - - huaxiangsun - Huaxiang Sun - huaxiangsun@apache.org - -8 - - - jdcryans - Jean-Daniel Cryans - jdcryans@apache.org - -8 - - - jeffreyz - Jeffrey Zhong - jeffreyz@apache.org - -8 - - - jerryjch - Jing Chen (Jerry) He - jerryjch@apache.org - -8 - - - jyates - Jesse Yates - jyates@apache.org - -8 - - - jgray - Jonathan Gray - jgray@fb.com - -8 - - - jingchengdu - Jingcheng Du - jingchengdu@apache.org - +8 - - - esteban - Esteban Gutierrez - esteban@apache.org - -8 - - - janh - Jan Hentschel - janh@apache.org - +1 - - - jmhsieh - Jonathan Hsieh - jmhsieh@apache.org - -8 - - - jxiang - Jimmy Xiang - jxiang@apache.org - -8 - - - kannan - Kannan Muthukkaruppan - kannan@fb.com - -8 - - - karthik - Karthik Ranganathan - kranganathan@fb.com - -8 - - - larsfrancke - Lars Francke - larsfrancke@apache.org - Europe/Berlin - - - larsgeorge - Lars George - larsgeorge@apache.org - +1 - - - larsh - Lars Hofhansl - larsh@apache.org - -8 - - - liangxie - Liang Xie - liangxie@apache.org - +8 - - - liushaohui - Shaohui Liu - liushaohui@apache.org - +8 - - - liyin - Liyin Tang - liyin.tang@fb.com - -8 - - - liyu - Yu Li - liyu@apache.org - +8 - - - mbautin - Mikhail Bautin - mbautin@apache.org - -8 - - - mbertozzi - Matteo Bertozzi - mbertozzi@apache.org - 0 - - - mdrob - Mike Drob - mdrob@apache.org - -5 - - - meszibalu - Balazs Meszaros - meszibalu@apache.org - +1 - - - misty - Misty Stanley-Jones - misty@apache.org - -8 - - - ndimiduk - Nick Dimiduk - ndimiduk@apache.org - -8 - - - nihaljain - Nihal Jain - nihaljain@apache.org - +5 - - - niuyulin - Yulin Niu - niuyulin@apache.org - +8 - - - nkeywal - Nicolas Liochon - nkeywal@apache.org - +1 - - - nspiegelberg - Nicolas Spiegelberg - nspiegelberg@fb.com - -8 - - - octo47 - Andrey Stepachev - octo47@gmail.com - 0 - - - openinx - Zheng Hu - openinx@apache.org - +8 - - - pankajkumar - Pankaj Kumar - pankajkumar@apache.org - +5 - - - psomogyi - Peter Somogyi - psomogyi@apache.org - +1 - - - rajeshbabu - Rajeshbabu Chintaguntla - rajeshbabu@apache.org - +5 - - - ramkrishna - Ramkrishna S Vasudevan - ramkrishna@apache.org - +5 - - - rawson - Ryan Rawson - rawson@apache.org - -8 - - - reidchan - Reid Chan - reidchan@apache.org - +8 - - - shahrs87 - Rushabh Shah - shahrs87@apache.org - -8 - - - sakthi - Sakthi Vel - sakthi@apache.org - -8 - - - sershe - Sergey Shelukhin - sershe@apache.org - -8 - - - ssrungarapu - Srikanth Srungarapu - ssrungarapu@apache.org - -8 - - - stack - Michael Stack - stack@apache.org - -8 - - - syuanjiang - Stephen Yuan Jiang - syuanjiang@apache.org - -8 - - - taklwu - Tak-Lon (Stephen) Wu - taklwu@apache.org - -8 - - - tedyu - Ted Yu - yuzhihong@gmail.com - -8 - - - tianhang - Tianhang Tang - tianhang@apache.org - +8 - - - tianjy - tianjy@apache.org - +8 - - - todd - Todd Lipcon - todd@apache.org - -8 - - - toffer - Francis Liu - toffer@apache.org - -8 - - - vikasv - Vikas Vishwakarma - vikasv@apache.org - +5 - - - virag - Virag Kothari - virag@yahoo-inc.com - -8 - - - vjasani - Viraj Jasani - vjasani@apache.org - +5 - - - water - Xiang Li - xiangli@apache.org - +8 - - - wchevreuil - Wellington Chevreuil - wchevreuil@apache.org - 0 - - - weichiu - Wei-Chiu Chuang - weichiu@apache.org - -8 - - - xucang - Xu Cang - xucang@apache.org - -8 - - - yangzhe1991 - Phil Yang - yangzhe1991@apache.org - +8 - - - zghao - Guanghao Zhang - zghao@apache.org - +8 - - - zhangduo - Duo Zhang - zhangduo@apache.org - +8 - - - zhaobaiqiang - Baiqiang Zhao - zhaobaiqiang@apache.org - +8 - - - zjushch - Chunhui Shen - zjushch@apache.org - +8 - - - churro - Rahul Gidwani - churro@apache.org - -8 - - - yiliang - Yi Liang - yiliang@apache.org - -8 - - - zyork - Zach York - zyork@apache.org - -8 - - - meiyi - Yi Mei - meiyi@apache.org - +8 - - - wangzheng - Zheng (bsglz) Wang - wangzheng@apache.org - +8 - - - sunxin - Xin Sun - sunxin@apache.org - +8 - - - huangzhuoyue - Zhuoyue Huang - huangzhuoyue@apache.org - +8 - - - xiaoyt - Yutong Xiao - xiaoyt@apache.org - +8 - - - bbeaudreault - Bryan Beaudreault - bbeaudreault@apache.org - -5 - - - heliangjun - Liangjun He - heliangjun@apache.org - +8 - - - - - User List - user-subscribe@hbase.apache.org - user-unsubscribe@hbase.apache.org - user@hbase.apache.org - https://lists.apache.org/list.html?user@hbase.apache.org - - https://dir.gmane.org/gmane.comp.java.hadoop.hbase.user - - - - Developer List - dev-subscribe@hbase.apache.org - dev-unsubscribe@hbase.apache.org - dev@hbase.apache.org - https://lists.apache.org/list.html?dev@hbase.apache.org - - https://dir.gmane.org/gmane.comp.java.hadoop.hbase.devel - - - - Commits List - commits-subscribe@hbase.apache.org - commits-unsubscribe@hbase.apache.org - https://lists.apache.org/list.html?commits@hbase.apache.org - - - Issues List - issues-subscribe@hbase.apache.org - issues-unsubscribe@hbase.apache.org - https://lists.apache.org/list.html?issues@hbase.apache.org - - - Builds List - builds-subscribe@hbase.apache.org - builds-unsubscribe@hbase.apache.org - https://lists.apache.org/list.html?builds@hbase.apache.org - - - User (ZH) List - user-zh-subscribe@hbase.apache.org - user-zh-unsubscribe@hbase.apache.org - user-zh@hbase.apache.org - https://lists.apache.org/list.html?user-zh@hbase.apache.org - - - - scm:git:git://gitbox.apache.org/repos/asf/hbase.git/hbase-build-configuration/hbase-mapreduce - scm:git:https://gitbox.apache.org/repos/asf/hbase.git/hbase-build-configuration/hbase-mapreduce - https://gitbox.apache.org/repos/asf?p=hbase.git/hbase-build-configuration/hbase-mapreduce - - - JIRA - https://issues.apache.org/jira/browse/HBASE - - - - apache.releases.https - Apache Release Distribution Repository - https://repository.apache.org/service/local/staging/deploy/maven2 - - - apache.snapshots.https - Apache Development Snapshot Repository - https://repository.apache.org/content/repositories/snapshots - - - hbase.apache.org - HBase Website at hbase.apache.org - file:///tmp/hbase-build-configuration/hbase-mapreduce - - - - - org.apache.hbase.thirdparty - hbase-shaded-miscellaneous - 4.1.4 - compile - - - org.apache.hbase.thirdparty - hbase-shaded-netty - 4.1.4 - compile - - - org.apache.hbase.thirdparty - hbase-shaded-protobuf - 4.1.4 - compile - - - org.apache.hbase - hbase-common - 3.0.0-beta-1-SNAPSHOT - compile - - - org.apache.hbase - hbase-zookeeper - 3.0.0-beta-1-SNAPSHOT - compile - - - com.google.code.findbugs - jsr305 - - - com.github.spotbugs - spotbugs-annotations - - - - - org.apache.hbase - hbase-protocol-shaded - 3.0.0-beta-1-SNAPSHOT - compile - - - org.apache.hbase - hbase-metrics - 3.0.0-beta-1-SNAPSHOT - compile - - - org.apache.hbase - hbase-metrics-api - 3.0.0-beta-1-SNAPSHOT - compile - - - org.apache.hbase - hbase-asyncfs - 3.0.0-beta-1-SNAPSHOT - compile - - - io.dropwizard.metrics - metrics-core - 3.2.6 - compile - - - org.slf4j - slf4j-api - 1.7.30 - compile - - - io.opentelemetry - opentelemetry-api - 1.15.0 - compile - - - org.apache.hbase - hbase-client - 3.0.0-beta-1-SNAPSHOT - compile - - - org.apache.hbase - hbase-hadoop-compat - 3.0.0-beta-1-SNAPSHOT - compile - - - org.apache.hbase - hbase-server - 3.0.0-beta-1-SNAPSHOT - compile - - - commons-logging - commons-logging - - - - - org.apache.hbase - hbase-replication - 3.0.0-beta-1-SNAPSHOT - compile - - - com.github.stephenc.findbugs - findbugs-annotations - 1.3.9-1 - compile - true - - - commons-io - commons-io - 2.11.0 - compile - - - org.apache.commons - commons-lang3 - 3.9 - compile - - - org.apache.zookeeper - zookeeper - 3.5.7 - compile - - - com.google.code.findbugs - jsr305 - - - com.github.spotbugs - spotbugs-annotations - - - jline - jline - - - com.sun.jmx - jmxri - - - com.sun.jdmk - jmxtools - - - javax.jms - jms - - - io.netty - netty - - - io.netty - netty-all - - - org.slf4j - slf4j-log4j12 - - - log4j - log4j - - - - - org.apache.yetus - audience-annotations - 0.13.0 - compile - - - org.apache.hadoop - hadoop-common - 3.2.4 - compile - - - com.sun.jersey - jersey-core - - - com.sun.jersey - jersey-json - - - com.sun.jersey - jersey-servlet - - - com.sun.jersey - jersey-server - - - javax.servlet.jsp - jsp-api - - - javax.servlet - javax.servlet-api - - - stax - stax-api - - - io.netty - netty - - - com.google.code.findbugs - jsr305 - - - junit - junit - - - org.codehaus.jackson - * - - - org.slf4j - slf4j-log4j12 - - - log4j - log4j - - - ch.qos.reload4j - reload4j - - - org.slf4j - slf4j-reload4j - - - io.netty - netty - - - io.netty - netty-all - - - - - org.apache.hadoop - hadoop-hdfs - 3.2.4 - compile - - - com.sun.jersey - jersey-core - - - com.sun.jersey - jersey-server - - - javax.servlet.jsp - jsp-api - - - javax.servlet - servlet-api - - - stax - stax-api - - - xerces - xercesImpl - - - org.codehaus.jackson - * - - - com.google.guava - guava - - - org.slf4j - slf4j-log4j12 - - - log4j - log4j - - - ch.qos.reload4j - reload4j - - - org.slf4j - slf4j-reload4j - - - org.fusesource.leveldbjni - leveldbjni-all - - - org.openlabtesting.leveldbjni - leveldbjni-all - - - - - org.apache.hadoop - hadoop-hdfs - 3.2.4 - test-jar - test - - - javax.servlet.jsp - jsp-api - - - javax.servlet - servlet-api - - - stax - stax-api - - - xerces - xercesImpl - - - org.codehaus.jackson - * - - - com.google.guava - guava - - - com.sun.jersey - jersey-core - - - org.slf4j - slf4j-log4j12 - - - log4j - log4j - - - ch.qos.reload4j - reload4j - - - org.slf4j - slf4j-reload4j - - - - - javax.ws.rs - javax.ws.rs-api - 2.1.1 - test - - - org.apache.hadoop - hadoop-minicluster - 3.2.4 - test - - - javax.ws.rs - jsr311-api - - - - - org.apache.hadoop - hadoop-minikdc - 3.2.4 - test - - - org.slf4j - slf4j-log4j12 - - - ch.qos.reload4j - reload4j - - - org.slf4j - slf4j-reload4j - - - bouncycastle - bcprov-jdk15 - - - - - org.apache.hadoop - hadoop-mapreduce-client-jobclient - 3.2.4 - test-jar - test - - - org.codehaus.jackson - jackson-mapper-asl - - - org.codehaus.jackson - jackson-core-asl - - - javax.xml.bind - jaxb-api - - - javax.ws.rs - jsr311-api - - - - - - - - false - - apache.snapshots - Apache Snapshot Repository - https://repository.apache.org/snapshots - - - - - - org.apache.felix - maven-bundle-plugin - 3.3.0 - true - - - -