@@ -386,6 +386,8 @@ protected void recover() throws IOException {
386386 Map <Long , BlockRecord > syncBlocks = new HashMap <>(locs .length );
387387 final int dataBlkNum = ecPolicy .getNumDataUnits ();
388388 final int totalBlkNum = dataBlkNum + ecPolicy .getNumParityUnits ();
389+ int zeroLenReplicaCnt = 0 ;
390+ int dnNotHaveReplicaCnt = 0 ;
389391 //check generation stamps
390392 for (int i = 0 ; i < locs .length ; i ++) {
391393 DatanodeID id = locs [i ];
@@ -419,10 +421,14 @@ protected void recover() throws IOException {
419421 if (info == null ) {
420422 LOG .debug ("Block recovery: DataNode: {} does not have " +
421423 "replica for block: (block={}, internalBlk={})" , id , block , internalBlk );
424+ dnNotHaveReplicaCnt ++;
422425 } else {
423426 LOG .debug ("Block recovery: Ignored replica with invalid "
424427 + "generation stamp or length: {} from DataNode: {} by block: {}" ,
425428 info , id , block );
429+ if (info .getNumBytes () == 0 ) {
430+ zeroLenReplicaCnt ++;
431+ }
426432 }
427433 }
428434 } catch (RecoveryInProgressException ripE ) {
@@ -436,9 +442,18 @@ protected void recover() throws IOException {
436442 "datanode={})" , block , internalBlk , id , e );
437443 }
438444 }
439- checkLocations (syncBlocks .size ());
440445
441- final long safeLength = getSafeLength (syncBlocks );
446+ final long safeLength ;
447+ if (dnNotHaveReplicaCnt + zeroLenReplicaCnt <= locs .length - ecPolicy .getNumDataUnits ()) {
448+ checkLocations (syncBlocks .size ());
449+ safeLength = getSafeLength (syncBlocks );
450+ } else {
451+ safeLength = 0 ;
452+ LOG .warn ("Block recovery: {} datanodes do not have the replica of block {}." +
453+ " {} datanodes have zero-length replica. Will remove this block." ,
454+ dnNotHaveReplicaCnt , block , zeroLenReplicaCnt );
455+ }
456+
442457 LOG .debug ("Recovering block {}, length={}, safeLength={}, syncList={}" , block ,
443458 block .getNumBytes (), safeLength , syncBlocks );
444459
@@ -452,11 +467,13 @@ protected void recover() throws IOException {
452467 rurList .add (r );
453468 }
454469 }
455- assert rurList .size () >= dataBlkNum : "incorrect safe length" ;
456470
457- // Recovery the striped block by truncating internal blocks to the safe
458- // length. Abort if there is any failure in this step.
459- truncatePartialBlock (rurList , safeLength );
471+ if (safeLength > 0 ) {
472+ Preconditions .checkArgument (rurList .size () >= dataBlkNum , "incorrect safe length" );
473+ // Recovery the striped block by truncating internal blocks to the safe
474+ // length. Abort if there is any failure in this step.
475+ truncatePartialBlock (rurList , safeLength );
476+ }
460477
461478 // notify Namenode the new size and locations
462479 final DatanodeID [] newLocs = new DatanodeID [totalBlkNum ];
@@ -469,11 +486,20 @@ protected void recover() throws IOException {
469486 int index = (int ) (r .rInfo .getBlockId () &
470487 HdfsServerConstants .BLOCK_GROUP_INDEX_MASK );
471488 newLocs [index ] = r .id ;
472- newStorages [index ] = r .storageID ;
489+ if (r .storageID != null ) {
490+ newStorages [index ] = r .storageID ;
491+ }
473492 }
474493 ExtendedBlock newBlock = new ExtendedBlock (bpid , block .getBlockId (),
475494 safeLength , recoveryId );
476495 DatanodeProtocolClientSideTranslatorPB nn = getActiveNamenodeForBP (bpid );
496+ if (safeLength == 0 ) {
497+ nn .commitBlockSynchronization (block , newBlock .getGenerationStamp (),
498+ newBlock .getNumBytes (), true , true , newLocs , newStorages );
499+ LOG .info ("After block recovery, the length of new block is 0. " +
500+ "Will remove this block: {} from file." , newBlock );
501+ return ;
502+ }
477503 nn .commitBlockSynchronization (block , newBlock .getGenerationStamp (),
478504 newBlock .getNumBytes (), true , false , newLocs , newStorages );
479505 }
@@ -527,8 +553,8 @@ long getSafeLength(Map<Long, BlockRecord> syncBlocks) {
527553 private void checkLocations (int locationCount )
528554 throws IOException {
529555 if (locationCount < ecPolicy .getNumDataUnits ()) {
530- throw new IOException (block + " has no enough internal blocks" +
531- ", unable to start recovery. Locations=" + Arrays .asList (locs ));
556+ throw new IOException (block + " has no enough internal blocks(current: " + locationCount +
557+ ") , unable to start recovery. Locations=" + Arrays .asList (locs ));
532558 }
533559 }
534560 }
0 commit comments