Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HBASE-28457 Introduce a version field in file based tracker record #5784

Merged
merged 1 commit into from
Apr 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,5 @@ message StoreFileEntry {
message StoreFileList {
required uint64 timestamp = 1;
repeated StoreFileEntry store_file = 2;
optional uint64 version = 3 [default = 1];
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
*/
package org.apache.hadoop.hbase.regionserver.storefiletracker;

import com.google.errorprone.annotations.RestrictedApi;
import java.io.EOFException;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
Expand Down Expand Up @@ -59,19 +61,28 @@
* without error on partial bytes if you stop at some special points, but the return message will
* have incorrect field value. We should try our best to prevent this happens because loading an
* incorrect store file list file usually leads to data loss.
* <p/>
* To prevent failing silently while downgrading, where we may miss some newly introduced fields in
* {@link StoreFileList} which are necessary, we introduce a 'version' field in
* {@link StoreFileList}. If we find out that we are reading a {@link StoreFileList} with higher
* version, we will fail immediately and tell users that you need extra steps while downgrading, to
* prevent potential data loss.
*/
@InterfaceAudience.Private
class StoreFileListFile {

private static final Logger LOG = LoggerFactory.getLogger(StoreFileListFile.class);

// the current version for StoreFileList
static final long VERSION = 1;

static final String TRACK_FILE_DIR = ".filelist";

private static final String TRACK_FILE_PREFIX = "f1";
static final String TRACK_FILE_PREFIX = "f1";

private static final String TRACK_FILE_ROTATE_PREFIX = "f2";

private static final char TRACK_FILE_SEPARATOR = '.';
static final char TRACK_FILE_SEPARATOR = '.';

static final Pattern TRACK_FILE_PATTERN = Pattern.compile("^f(1|2)\\.\\d+$");

Expand Down Expand Up @@ -114,7 +125,18 @@ static StoreFileList load(FileSystem fs, Path path) throws IOException {
throw new IOException(
"Checksum mismatch, expected " + expectedChecksum + ", actual " + calculatedChecksum);
}
return StoreFileList.parseFrom(data);
StoreFileList storeFileList = StoreFileList.parseFrom(data);
if (storeFileList.getVersion() > VERSION) {
LOG.error(
"The loaded store file list is in version {}, which is higher than expected"
+ " version {}. Stop loading to prevent potential data loss. This usually because your"
+ " cluster is downgraded from a newer version. You need extra steps before downgrading,"
+ " like switching back to default store file tracker.",
storeFileList.getVersion(), VERSION);
throw new IOException("Higher store file list version detected, expected " + VERSION
+ ", got " + storeFileList.getVersion());
}
return storeFileList;
}

StoreFileList load(Path path) throws IOException {
Expand Down Expand Up @@ -145,7 +167,7 @@ private NavigableMap<Long, List<Path>> listFiles() throws IOException {
if (statuses == null || statuses.length == 0) {
return Collections.emptyNavigableMap();
}
TreeMap<Long, List<Path>> map = new TreeMap<>((l1, l2) -> l2.compareTo(l1));
TreeMap<Long, List<Path>> map = new TreeMap<>(Comparator.reverseOrder());
for (FileStatus status : statuses) {
Path file = status.getPath();
if (!status.isFile()) {
Expand Down Expand Up @@ -232,31 +254,39 @@ StoreFileList load(boolean readOnly) throws IOException {
return lists[winnerIndex];
}

@RestrictedApi(explanation = "Should only be called in tests", link = "",
allowedOnPath = ".*/StoreFileListFile.java|.*/src/test/.*")
static void write(FileSystem fs, Path file, StoreFileList storeFileList) throws IOException {
byte[] data = storeFileList.toByteArray();
CRC32 crc32 = new CRC32();
crc32.update(data);
int checksum = (int) crc32.getValue();
// 4 bytes length at the beginning, plus 4 bytes checksum
try (FSDataOutputStream out = fs.create(file, true)) {
out.writeInt(data.length);
out.write(data);
out.writeInt(checksum);
}
}

/**
* We will set the timestamp in this method so just pass the builder in
* We will set the timestamp and version in this method so just pass the builder in
*/
void update(StoreFileList.Builder builder) throws IOException {
if (nextTrackFile < 0) {
// we need to call load first to load the prevTimestamp and also the next file
// we are already in the update method, which is not read only, so pass false
load(false);
}
long timestamp = Math.max(prevTimestamp + 1, EnvironmentEdgeManager.currentTime());
byte[] actualData = builder.setTimestamp(timestamp).build().toByteArray();
CRC32 crc32 = new CRC32();
crc32.update(actualData);
int checksum = (int) crc32.getValue();
// 4 bytes length at the beginning, plus 4 bytes checksum
FileSystem fs = ctx.getRegionFileSystem().getFileSystem();
try (FSDataOutputStream out = fs.create(trackFiles[nextTrackFile], true)) {
out.writeInt(actualData.length);
out.write(actualData);
out.writeInt(checksum);
}
long timestamp = Math.max(prevTimestamp + 1, EnvironmentEdgeManager.currentTime());
write(fs, trackFiles[nextTrackFile],
builder.setTimestamp(timestamp).setVersion(VERSION).build());
// record timestamp
prevTimestamp = timestamp;
// rotate the file
nextTrackFile = 1 - nextTrackFile;

try {
fs.delete(trackFiles[nextTrackFile], false);
} catch (IOException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.ClassRule;
Expand Down Expand Up @@ -222,4 +223,20 @@ public void testConcurrentUpdate() throws IOException {
assertEquals("hehe", entry.getName());
assertEquals(10, entry.getSize());
}

@Test
public void testLoadHigherVersion() throws IOException {
// write a fake StoreFileList file with higher version
StoreFileList storeFileList =
StoreFileList.newBuilder().setVersion(StoreFileListFile.VERSION + 1)
.setTimestamp(EnvironmentEdgeManager.currentTime()).build();
Path trackFileDir = new Path(testDir, StoreFileListFile.TRACK_FILE_DIR);
StoreFileListFile.write(FileSystem.get(UTIL.getConfiguration()),
new Path(trackFileDir, StoreFileListFile.TRACK_FILE_PREFIX
+ StoreFileListFile.TRACK_FILE_SEPARATOR + EnvironmentEdgeManager.currentTime()),
storeFileList);
IOException error = assertThrows(IOException.class, () -> create().load(false));
assertEquals("Higher store file list version detected, expected " + StoreFileListFile.VERSION
+ ", got " + (StoreFileListFile.VERSION + 1), error.getMessage());
}
}