Skip to content

Commit

Permalink
Variant to HBase load: allow to specify hadoop cluster
Browse files Browse the repository at this point in the history
Use output option to specify hadoop cluster url (issue #11)
  • Loading branch information
Matthias Haimel committed May 21, 2015
1 parent ba7fceb commit 2fc0ec4
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -492,8 +494,12 @@ private void cram2avro(String input, String output, String codecName) throws IOE

private void variant2hbase(String input, String output) throws Exception {
Variant2HbaseMR mr = new Variant2HbaseMR();
String[] args = new String[]{"-i",input,"-t","VariantLoad"};
int run = ToolRunner.run(mr, args);
List<String> args = new ArrayList<String>(Arrays.asList(new String[]{"-i",input,"-t","VariantLoad"}));
if(StringUtils.isNotBlank(output)){
args.add("-o");
args.add(output);
}
int run = ToolRunner.run(mr, args.toArray(new String[0]));
if(run != 0)
throw new IllegalStateException(String.format("Variant 2 HBase finished with %s !", run));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.apache.avro.mapreduce.AvroJob;
import org.apache.avro.mapreduce.AvroKeyInputFormat;
import org.apache.commons.lang.NotImplementedException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
Expand Down Expand Up @@ -155,11 +156,14 @@ public Configuration getConf() {
public int run(String[] args) throws Exception {
String tablename = "test_table";
String inputfile = null;
String output = null;
for(int i = 0; i < args.length; ++i){
if(args[i] == "-t")
tablename = args[++i];
if(args[i] == "-i")
inputfile = args[++i];
if(args[i] == "-o")
output = args[++i];
}

setConf(HBaseConfiguration.addHbaseResources(getConf()));
Expand All @@ -172,19 +176,27 @@ public int run(String[] args) throws Exception {
AvroJob.setInputKeySchema(job, Variant.getClassSchema());
FileInputFormat.setInputPaths(job, new Path(inputfile));
job.setInputFormatClass(AvroKeyInputFormat.class);


// output -> Hbase
TableMapReduceUtil.initTableReducerJob(tablename, null, job);
job.setNumReduceTasks(0); // Write to table directory
if(StringUtils.isNotBlank(output)){
Configuration conf = getConf();
conf.set("hbase.zookeeper.quorum", output);
conf.set("hbase.master", output+":60000");
setConf(conf);
}

// mapper
job.setMapperClass(Variant2HbaseMR.class);

// create Table if needed
createTableIfNeeded(tablename);

return job.waitForCompletion(true)?0:1;
long start = System.currentTimeMillis();
boolean completed = job.waitForCompletion(true);
long end = System.currentTimeMillis();
getLog().info(String.format("Loading run for %s ms!", (end-start)));
return completed?0:1;
}

/**
Expand Down
9 changes: 8 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>

<!-- General dependencies -->
<dependency>
<groupId>com.google.guava</groupId>
Expand Down Expand Up @@ -172,6 +172,13 @@
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<version>1.6</version>
<scope>system</scope>
<systemPath>${java.home}/lib/tools.jar</systemPath>
</dependency>
</dependencies>
</dependencyManagement>

Expand Down

0 comments on commit 2fc0ec4

Please sign in to comment.