Skip to content

Commit 1862064

Browse files
committed
HDFS-7185. The active NameNode will not accept an fsimage sent from the standby during rolling upgrade. Contributed by Jing Zhao.
1 parent b9edad6 commit 1862064

File tree

7 files changed

+87
-26
lines changed

7 files changed

+87
-26
lines changed

hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -956,6 +956,9 @@ Release 2.6.0 - UNRELEASED
956956
HDFS-7237. The command "hdfs namenode -rollingUpgrade" throws
957957
ArrayIndexOutOfBoundsException. (szetszwo)
958958

959+
HDFS-7185. The active NameNode will not accept an fsimage sent from the
960+
standby during rolling upgrade. (jing9)
961+
959962
BREAKDOWN OF HDFS-6134 AND HADOOP-10150 SUBTASKS AND RELATED JIRAS
960963

961964
HDFS-6387. HDFS CLI admin tool for creating & deleting an

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ private boolean recoverStorageDirs(StartupOption startOpt,
322322
if (curState != StorageState.NOT_FORMATTED
323323
&& startOpt != StartupOption.ROLLBACK) {
324324
// read and verify consistency with other directories
325-
storage.readProperties(sd);
325+
storage.readProperties(sd, startOpt);
326326
isFormatted = true;
327327
}
328328
if (startOpt == StartupOption.IMPORT && isFormatted)
@@ -563,7 +563,7 @@ void openEditLogForWrite() throws IOException {
563563
assert editLog != null : "editLog must be initialized";
564564
editLog.openForWrite();
565565
storage.writeTransactionIdFileToStorage(editLog.getCurSegmentTxId());
566-
};
566+
}
567567

568568
/**
569569
* Toss the current image and namesystem, reloading from the specified
@@ -572,7 +572,7 @@ void openEditLogForWrite() throws IOException {
572572
void reloadFromImageFile(File file, FSNamesystem target) throws IOException {
573573
target.clear();
574574
LOG.debug("Reloading namespace from " + file);
575-
loadFSImage(file, target, null);
575+
loadFSImage(file, target, null, false);
576576
}
577577

578578
/**
@@ -603,7 +603,8 @@ private boolean loadFSImage(FSNamesystem target, StartupOption startOpt,
603603
// otherwise we can load from both IMAGE and IMAGE_ROLLBACK
604604
nnfs = EnumSet.of(NameNodeFile.IMAGE, NameNodeFile.IMAGE_ROLLBACK);
605605
}
606-
final FSImageStorageInspector inspector = storage.readAndInspectDirs(nnfs);
606+
final FSImageStorageInspector inspector = storage
607+
.readAndInspectDirs(nnfs, startOpt);
607608

608609
isUpgradeFinalized = inspector.isUpgradeFinalized();
609610
List<FSImageFile> imageFiles = inspector.getLatestImages();
@@ -659,7 +660,7 @@ LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) {
659660
for (int i = 0; i < imageFiles.size(); i++) {
660661
try {
661662
imageFile = imageFiles.get(i);
662-
loadFSImageFile(target, recovery, imageFile);
663+
loadFSImageFile(target, recovery, imageFile, startOpt);
663664
break;
664665
} catch (IOException ioe) {
665666
LOG.error("Failed to load image from " + imageFile, ioe);
@@ -712,16 +713,18 @@ private void rollingRollback(long discardSegmentTxId, long ckptId)
712713
}
713714

714715
void loadFSImageFile(FSNamesystem target, MetaRecoveryContext recovery,
715-
FSImageFile imageFile) throws IOException {
716+
FSImageFile imageFile, StartupOption startupOption) throws IOException {
716717
LOG.debug("Planning to load image :\n" + imageFile);
717718
StorageDirectory sdForProperties = imageFile.sd;
718-
storage.readProperties(sdForProperties);
719+
storage.readProperties(sdForProperties, startupOption);
719720

720721
if (NameNodeLayoutVersion.supports(
721722
LayoutVersion.Feature.TXID_BASED_LAYOUT, getLayoutVersion())) {
722723
// For txid-based layout, we should have a .md5 file
723724
// next to the image file
724-
loadFSImage(imageFile.getFile(), target, recovery);
725+
boolean isRollingRollback = RollingUpgradeStartupOption.ROLLBACK
726+
.matches(startupOption);
727+
loadFSImage(imageFile.getFile(), target, recovery, isRollingRollback);
725728
} else if (NameNodeLayoutVersion.supports(
726729
LayoutVersion.Feature.FSIMAGE_CHECKSUM, getLayoutVersion())) {
727730
// In 0.22, we have the checksum stored in the VERSION file.
@@ -733,10 +736,11 @@ LayoutVersion.Feature.FSIMAGE_CHECKSUM, getLayoutVersion())) {
733736
NNStorage.DEPRECATED_MESSAGE_DIGEST_PROPERTY +
734737
" not set for storage directory " + sdForProperties.getRoot());
735738
}
736-
loadFSImage(imageFile.getFile(), new MD5Hash(md5), target, recovery);
739+
loadFSImage(imageFile.getFile(), new MD5Hash(md5), target, recovery,
740+
false);
737741
} else {
738742
// We don't have any record of the md5sum
739-
loadFSImage(imageFile.getFile(), null, target, recovery);
743+
loadFSImage(imageFile.getFile(), null, target, recovery, false);
740744
}
741745
}
742746

@@ -894,27 +898,30 @@ private static void updateCountForQuotaRecursively(INodeDirectory dir,
894898
* it against the MD5 sum stored in its associated .md5 file.
895899
*/
896900
private void loadFSImage(File imageFile, FSNamesystem target,
897-
MetaRecoveryContext recovery) throws IOException {
901+
MetaRecoveryContext recovery, boolean requireSameLayoutVersion)
902+
throws IOException {
898903
MD5Hash expectedMD5 = MD5FileUtils.readStoredMd5ForFile(imageFile);
899904
if (expectedMD5 == null) {
900905
throw new IOException("No MD5 file found corresponding to image file "
901906
+ imageFile);
902907
}
903-
loadFSImage(imageFile, expectedMD5, target, recovery);
908+
loadFSImage(imageFile, expectedMD5, target, recovery,
909+
requireSameLayoutVersion);
904910
}
905911

906912
/**
907913
* Load in the filesystem image from file. It's a big list of
908914
* filenames and blocks.
909915
*/
910916
private void loadFSImage(File curFile, MD5Hash expectedMd5,
911-
FSNamesystem target, MetaRecoveryContext recovery) throws IOException {
917+
FSNamesystem target, MetaRecoveryContext recovery,
918+
boolean requireSameLayoutVersion) throws IOException {
912919
// BlockPoolId is required when the FsImageLoader loads the rolling upgrade
913920
// information. Make sure the ID is properly set.
914921
target.setBlockPoolId(this.getBlockPoolID());
915922

916923
FSImageFormat.LoaderDelegator loader = FSImageFormat.newLoader(conf, target);
917-
loader.load(curFile);
924+
loader.load(curFile, requireSameLayoutVersion);
918925

919926
// Check that the image digest we loaded matches up with what
920927
// we expected
@@ -1033,7 +1040,7 @@ public synchronized void updateStorageVersion() throws IOException {
10331040
}
10341041

10351042
/**
1036-
* @see #saveNamespace(FSNamesystem, Canceler)
1043+
* @see #saveNamespace(FSNamesystem, NameNodeFile, Canceler)
10371044
*/
10381045
public synchronized void saveNamespace(FSNamesystem source)
10391046
throws IOException {
@@ -1072,7 +1079,7 @@ public synchronized void saveNamespace(FSNamesystem source, NameNodeFile nnf,
10721079
}
10731080

10741081
/**
1075-
* @see #saveFSImageInAllDirs(FSNamesystem, long, Canceler)
1082+
* @see #saveFSImageInAllDirs(FSNamesystem, NameNodeFile, long, Canceler)
10761083
*/
10771084
protected synchronized void saveFSImageInAllDirs(FSNamesystem source, long txid)
10781085
throws IOException {

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,8 @@ public long getLoadedImageTxId() {
209209
return impl.getLoadedImageTxId();
210210
}
211211

212-
public void load(File file) throws IOException {
212+
public void load(File file, boolean requireSameLayoutVersion)
213+
throws IOException {
213214
Preconditions.checkState(impl == null, "Image already loaded!");
214215

215216
FileInputStream is = null;
@@ -219,15 +220,14 @@ public void load(File file) throws IOException {
219220
IOUtils.readFully(is, magic, 0, magic.length);
220221
if (Arrays.equals(magic, FSImageUtil.MAGIC_HEADER)) {
221222
FSImageFormatProtobuf.Loader loader = new FSImageFormatProtobuf.Loader(
222-
conf, fsn);
223+
conf, fsn, requireSameLayoutVersion);
223224
impl = loader;
224225
loader.load(file);
225226
} else {
226227
Loader loader = new Loader(conf, fsn);
227228
impl = loader;
228229
loader.load(file);
229230
}
230-
231231
} finally {
232232
IOUtils.cleanup(LOG, is);
233233
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,12 @@
4242
import org.apache.commons.logging.LogFactory;
4343
import org.apache.hadoop.classification.InterfaceAudience;
4444
import org.apache.hadoop.conf.Configuration;
45+
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
4546
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
4647
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
4748
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
49+
import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
50+
import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
4851
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
4952
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
5053
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection;
@@ -139,11 +142,19 @@ public static final class Loader implements FSImageFormat.AbstractLoader {
139142
private MD5Hash imgDigest;
140143
/** The transaction ID of the last edit represented by the loaded file */
141144
private long imgTxId;
142-
143-
Loader(Configuration conf, FSNamesystem fsn) {
145+
/**
146+
* Whether the image's layout version must be the same with
147+
* {@link HdfsConstants#NAMENODE_LAYOUT_VERSION}. This is only set to true
148+
* when we're doing (rollingUpgrade rollback).
149+
*/
150+
private final boolean requireSameLayoutVersion;
151+
152+
Loader(Configuration conf, FSNamesystem fsn,
153+
boolean requireSameLayoutVersion) {
144154
this.conf = conf;
145155
this.fsn = fsn;
146156
this.ctx = new LoaderContext();
157+
this.requireSameLayoutVersion = requireSameLayoutVersion;
147158
}
148159

149160
@Override
@@ -181,6 +192,12 @@ private void loadInternal(RandomAccessFile raFile, FileInputStream fin)
181192
throw new IOException("Unrecognized file format");
182193
}
183194
FileSummary summary = FSImageUtil.loadSummary(raFile);
195+
if (requireSameLayoutVersion && summary.getLayoutVersion() !=
196+
HdfsConstants.NAMENODE_LAYOUT_VERSION) {
197+
throw new IOException("Image version " + summary.getLayoutVersion() +
198+
" is not equal to the software version " +
199+
HdfsConstants.NAMENODE_LAYOUT_VERSION);
200+
}
184201

185202
FileChannel channel = fin.getChannel();
186203

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1021,7 +1021,8 @@ private void loadFSImage(StartupOption startOpt) throws IOException {
10211021
MetaRecoveryContext recovery = startOpt.createRecoveryContext();
10221022
final boolean staleImage
10231023
= fsImage.recoverTransitionRead(startOpt, this, recovery);
1024-
if (RollingUpgradeStartupOption.ROLLBACK.matches(startOpt)) {
1024+
if (RollingUpgradeStartupOption.ROLLBACK.matches(startOpt) ||
1025+
RollingUpgradeStartupOption.DOWNGRADE.matches(startOpt)) {
10251026
rollingUpgradeInfo = null;
10261027
}
10271028
final boolean needToSave = staleImage && !haEnabled && !isRollingUpgrade();
@@ -1031,6 +1032,8 @@ private void loadFSImage(StartupOption startOpt) throws IOException {
10311032
if (needToSave) {
10321033
fsImage.saveNamespace(this);
10331034
} else {
1035+
updateStorageVersionForRollingUpgrade(fsImage.getLayoutVersion(),
1036+
startOpt);
10341037
// No need to save, so mark the phase done.
10351038
StartupProgress prog = NameNode.getStartupProgress();
10361039
prog.beginPhase(Phase.SAVING_CHECKPOINT);
@@ -1052,6 +1055,18 @@ private void loadFSImage(StartupOption startOpt) throws IOException {
10521055
imageLoadComplete();
10531056
}
10541057

1058+
private void updateStorageVersionForRollingUpgrade(final long layoutVersion,
1059+
StartupOption startOpt) throws IOException {
1060+
boolean rollingStarted = RollingUpgradeStartupOption.STARTED
1061+
.matches(startOpt) && layoutVersion > HdfsConstants
1062+
.NAMENODE_LAYOUT_VERSION;
1063+
boolean rollingRollback = RollingUpgradeStartupOption.ROLLBACK
1064+
.matches(startOpt);
1065+
if (rollingRollback || rollingStarted) {
1066+
fsImage.updateStorageVersion();
1067+
}
1068+
}
1069+
10551070
private void startSecretManager() {
10561071
if (dtSecretManager != null) {
10571072
try {

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,11 @@
3939
import org.apache.hadoop.hdfs.DFSUtil;
4040
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
4141
import org.apache.hadoop.hdfs.protocol.LayoutVersion;
42+
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
4243
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
4344
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
4445
import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
46+
import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
4547
import org.apache.hadoop.hdfs.server.common.Storage;
4648
import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
4749
import org.apache.hadoop.hdfs.server.common.Util;
@@ -620,6 +622,23 @@ LayoutVersion.Feature.FEDERATION, getLayoutVersion())) {
620622
setDeprecatedPropertiesForUpgrade(props);
621623
}
622624

625+
void readProperties(StorageDirectory sd, StartupOption startupOption)
626+
throws IOException {
627+
Properties props = readPropertiesFile(sd.getVersionFile());
628+
if (HdfsServerConstants.RollingUpgradeStartupOption.ROLLBACK.matches
629+
(startupOption)) {
630+
int lv = Integer.parseInt(getProperty(props, sd, "layoutVersion"));
631+
if (lv > getServiceLayoutVersion()) {
632+
// we should not use a newer version for rollingUpgrade rollback
633+
throw new IncorrectVersionException(getServiceLayoutVersion(), lv,
634+
"storage directory " + sd.getRoot().getAbsolutePath());
635+
}
636+
props.setProperty("layoutVersion",
637+
Integer.toString(HdfsConstants.NAMENODE_LAYOUT_VERSION));
638+
}
639+
setFieldsFromProperties(props, sd);
640+
}
641+
623642
/**
624643
* Pull any properties out of the VERSION file that are from older
625644
* versions of HDFS and only necessary during upgrade.
@@ -1002,8 +1021,8 @@ void inspectStorageDirs(FSImageStorageInspector inspector)
10021021
* <b>Note:</b> this can mutate the storage info fields (ctime, version, etc).
10031022
* @throws IOException if no valid storage dirs are found or no valid layout version
10041023
*/
1005-
FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes)
1006-
throws IOException {
1024+
FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes,
1025+
StartupOption startupOption) throws IOException {
10071026
Integer layoutVersion = null;
10081027
boolean multipleLV = false;
10091028
StringBuilder layoutVersions = new StringBuilder();
@@ -1016,7 +1035,7 @@ FSImageStorageInspector readAndInspectDirs(EnumSet<NameNodeFile> fileTypes)
10161035
FSImage.LOG.warn("Storage directory " + sd + " contains no VERSION file. Skipping...");
10171036
continue;
10181037
}
1019-
readProperties(sd); // sets layoutVersion
1038+
readProperties(sd, startupOption); // sets layoutVersion
10201039
int lv = getLayoutVersion();
10211040
if (layoutVersion == null) {
10221041
layoutVersion = Integer.valueOf(lv);

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ private void loadFSImageFromTempFile(File imageFile) throws IOException {
157157
fsn.writeLock();
158158
fsn.getFSDirectory().writeLock();
159159
try {
160-
loader.load(imageFile);
160+
loader.load(imageFile, false);
161161
FSImage.updateCountForQuota(
162162
INodeDirectory.valueOf(fsn.getFSDirectory().getINode("/"), "/"));
163163
} finally {

0 commit comments

Comments
 (0)