Skip to content

Commit

Permalink
HBASE-25318 Config option for IntegrationTestImportTsv where to gener…
Browse files Browse the repository at this point in the history
…ate HFiles to bulkload (#2777)

IntegrationTestImportTsv is generating HFiles under the working directory of the
current hdfs user executing the tool, before bulkloading it into HBase.

Assuming you encrypt the HBase root directory within HDFS (using HDFS
Transparent Encryption), you can bulkload HFiles only if they sit in the same
encryption zone in HDFS as the HBase root directory itself.

When IntegrationTestImportTsv is executed against a real distributed cluster
and the working directory of the current user (e.g. /user/hbase) is not in the
same encryption zone as the HBase root directory (e.g. /hbase/data) then you
will get an exception:

```
ERROR org.apache.hadoop.hbase.regionserver.HRegion: There was a partial failure
due to IO when attempting to load d :
hdfs://mycluster/user/hbase/test-data/22d8460d-04cc-e032-88ca-2cc20a7dd01c/
IntegrationTestImportTsv/hfiles/d/74655e3f8da142cb94bc31b64f0475cc

org.apache.hadoop.ipc.RemoteException(java.io.IOException):
/user/hbase/test-data/22d8460d-04cc-e032-88ca-2cc20a7dd01c/
IntegrationTestImportTsv/hfiles/d/74655e3f8da142cb94bc31b64f0475cc
can't be moved into an encryption zone.
```

In this commit I make it configurable where the IntegrationTestImportTsv
generates the HFiles.

Co-authored-by: Mate Szalay-Beko <symat@apache.com>
Signed-off-by: Peter Somogyi <psomogyi@apache.org>
  • Loading branch information
2 people authored and petersomogyi committed Jan 5, 2021
1 parent 81d17d0 commit 95dc87b
Showing 1 changed file with 33 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.UUID;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
Expand Down Expand Up @@ -67,6 +69,8 @@ public class IntegrationTestImportTsv extends Configured implements Tool {

private static final String NAME = IntegrationTestImportTsv.class.getSimpleName();
private static final Logger LOG = LoggerFactory.getLogger(IntegrationTestImportTsv.class);
private static final String GENERATED_HFILE_FOLDER_PARAM_KEY =
"IntegrationTestImportTsv.generatedHFileFolder";

protected static final String simple_tsv =
"row1\t1\tc1\tc2\n" +
Expand Down Expand Up @@ -191,8 +195,8 @@ public void testGenerateAndLoad() throws Exception {
void generateAndLoad(final TableName table) throws Exception {
LOG.info("Running test testGenerateAndLoad.");
String cf = "d";
Path hfiles = new Path(
util.getDataTestDirOnTestFS(table.getNameAsString()), "hfiles");
Path hfiles = initGeneratedHFilePath(table);
LOG.info("The folder where the HFiles will be generated: {}", hfiles.toString());

Map<String, String> args = new HashMap<>();
args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
Expand Down Expand Up @@ -221,6 +225,12 @@ public int run(String[] args) throws Exception {
System.err.println(format("%s [genericOptions]", NAME));
System.err.println(" Runs ImportTsv integration tests against a distributed cluster.");
System.err.println();
System.err.println(" Use '-D" + GENERATED_HFILE_FOLDER_PARAM_KEY + "=<path>' to define a");
System.err.println(" base folder for the generated HFiles. If HDFS Transparent Encryption");
System.err.println(" is configured, then make sure to set this parameter to a folder in");
System.err.println(" the same encryption zone in HDFS as the HBase root directory,");
System.err.println(" otherwise the bulkload will fail.");
System.err.println();
ToolRunner.printGenericCommandUsage(System.err);
return 1;
}
Expand All @@ -238,6 +248,27 @@ public int run(String[] args) throws Exception {
return 0;
}

private Path initGeneratedHFilePath(final TableName table) throws IOException {
String folderParam = getConf().getTrimmed(GENERATED_HFILE_FOLDER_PARAM_KEY);
if (folderParam == null || folderParam.isEmpty()) {
// by default, fall back to the test data dir
return new Path(util.getDataTestDirOnTestFS(table.getNameAsString()), "hfiles");
}

Path hfiles = new Path(folderParam, UUID.randomUUID().toString());
FileSystem fs = util.getTestFileSystem();
String shouldPreserve = System.getProperty("hbase.testing.preserve.testdir", "false");
if (!Boolean.parseBoolean(shouldPreserve)) {
if (fs.getUri().getScheme().equals(FileSystem.getLocal(getConf()).getUri().getScheme())) {
File localFoler = new File(hfiles.toString());
localFoler.deleteOnExit();
} else {
fs.deleteOnExit(hfiles);
}
}
return hfiles;
}

public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
IntegrationTestingUtility.setUseDistributedCluster(conf);
Expand Down

0 comments on commit 95dc87b

Please sign in to comment.