Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HBASE-27125 The batch size of cleaning expired mob files should have an upper bound #4541

Merged
merged 1 commit into from
Jun 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ public final class MobConstants {
public static final String MOB_CACHE_EVICT_REMAIN_RATIO = "hbase.mob.cache.evict.remain.ratio";
public static final Tag MOB_REF_TAG =
new ArrayBackedTag(TagType.MOB_REFERENCE_TAG_TYPE, HConstants.EMPTY_BYTE_ARRAY);
public static final String MOB_CLEANER_BATCH_SIZE_UPPER_BOUND =
"hbase.master.mob.cleaner.batch.size.upper.bound";
public static final int DEFAULT_MOB_CLEANER_BATCH_SIZE_UPPER_BOUND = 10000;

public static final float DEFAULT_EVICT_REMAIN_RATIO = 0.5f;
public static final long DEFAULT_MOB_CACHE_EVICT_PERIOD = 3600L;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
*/
package org.apache.hadoop.hbase.mob;

import static org.apache.hadoop.hbase.mob.MobConstants.DEFAULT_MOB_CLEANER_BATCH_SIZE_UPPER_BOUND;
import static org.apache.hadoop.hbase.mob.MobConstants.MOB_CLEANER_BATCH_SIZE_UPPER_BOUND;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.ByteBuffer;
Expand Down Expand Up @@ -314,20 +317,30 @@ public static void cleanExpiredMobFiles(FileSystem fs, Configuration conf, Table
}
filesToClean
.add(new HStoreFile(fs, file.getPath(), conf, cacheConfig, BloomType.NONE, true));
if (
filesToClean.size() >= conf.getInt(MOB_CLEANER_BATCH_SIZE_UPPER_BOUND,
DEFAULT_MOB_CLEANER_BATCH_SIZE_UPPER_BOUND)
) {
if (
removeMobFiles(conf, fs, tableName, mobTableDir, columnDescriptor.getName(),
filesToClean)
) {
deletedFileCount += filesToClean.size();
}
filesToClean.clear();
}
}
} catch (Exception e) {
LOG.error("Cannot parse the fileName " + fileName, e);
}
}
if (!filesToClean.isEmpty()) {
sunhelly marked this conversation as resolved.
Show resolved Hide resolved
try {
removeMobFiles(conf, fs, tableName, mobTableDir, columnDescriptor.getName(), filesToClean);
deletedFileCount = filesToClean.size();
} catch (IOException e) {
LOG.error("Failed to delete the mob files " + filesToClean, e);
}
if (
!filesToClean.isEmpty() && removeMobFiles(conf, fs, tableName, mobTableDir,
columnDescriptor.getName(), filesToClean)
) {
deletedFileCount += filesToClean.size();
}
LOG.info("{} expired mob files are deleted", deletedFileCount);
sunhelly marked this conversation as resolved.
Show resolved Hide resolved
LOG.info("Table {} {} expired mob files in total are deleted", tableName, deletedFileCount);
}

/**
Expand Down Expand Up @@ -459,10 +472,17 @@ public static boolean isMobRegionName(TableName tableName, byte[] regionName) {
* @param family The name of the column family.
* @param storeFiles The files to be deleted.
*/
public static void removeMobFiles(Configuration conf, FileSystem fs, TableName tableName,
Path tableDir, byte[] family, Collection<HStoreFile> storeFiles) throws IOException {
HFileArchiver.archiveStoreFiles(conf, fs, getMobRegionInfo(tableName), tableDir, family,
storeFiles);
public static boolean removeMobFiles(Configuration conf, FileSystem fs, TableName tableName,
Path tableDir, byte[] family, Collection<HStoreFile> storeFiles) {
try {
HFileArchiver.archiveStoreFiles(conf, fs, getMobRegionInfo(tableName), tableDir, family,
storeFiles);
LOG.info("Table {} {} expired mob files are deleted", tableName, storeFiles.size());
return true;
} catch (IOException e) {
LOG.error("Failed to delete the mob files, table {}", tableName, e);
}
return false;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hbase.mob;

import static org.apache.hadoop.hbase.mob.MobConstants.MOB_CLEANER_BATCH_SIZE_UPPER_BOUND;
import static org.junit.Assert.assertEquals;

import org.apache.hadoop.fs.FileStatus;
Expand Down Expand Up @@ -55,6 +56,7 @@ public class TestExpiredMobFileCleaner {
private final static String family = "family";
private final static byte[] row1 = Bytes.toBytes("row1");
private final static byte[] row2 = Bytes.toBytes("row2");
private final static byte[] row3 = Bytes.toBytes("row3");
private final static byte[] qf = Bytes.toBytes("qf");

private static BufferedMutator table;
Expand All @@ -63,6 +65,7 @@ public class TestExpiredMobFileCleaner {
@BeforeClass
public static void setUpBeforeClass() throws Exception {
TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3);
TEST_UTIL.getConfiguration().setInt(MOB_CLEANER_BATCH_SIZE_UPPER_BOUND, 2);
}

@AfterClass
Expand Down Expand Up @@ -146,6 +149,14 @@ public void testCleaner() throws Exception {
String f2 = secondFiles[1].getPath().getName();
String secondFile = f1.equals(firstFile) ? f2 : f1;

ts = EnvironmentEdgeManager.currentTime() - 4 * secondsOfDay() * 1000; // 4 days before
putKVAndFlush(table, row3, dummyData, ts);
ts = EnvironmentEdgeManager.currentTime() - 4 * secondsOfDay() * 1000; // 4 days before
putKVAndFlush(table, row3, dummyData, ts);
FileStatus[] thirdFiles = TEST_UTIL.getTestFileSystem().listStatus(mobDirPath);
// now there are 4 mob files
assertEquals("Before cleanup without delay 3", 4, thirdFiles.length);

modifyColumnExpiryDays(2); // ttl = 2, make the first row expired

// run the cleaner
Expand All @@ -156,7 +167,7 @@ public void testCleaner() throws Exception {

FileStatus[] filesAfterClean = TEST_UTIL.getTestFileSystem().listStatus(mobDirPath);
String lastFile = filesAfterClean[0].getPath().getName();
// the first mob fie is removed
// there are 4 mob files in total, but only 3 need to be cleaned
assertEquals("After cleanup without delay 1", 1, filesAfterClean.length);
assertEquals("After cleanup without delay 2", secondFile, lastFile);
}
Expand Down