Procházet zdrojové kódy

HDFS-8486. DN startup may cause severe data loss (Daryn Sharp via Colin P. McCabe)

(cherry picked from commit 03fb5c642589dec4e663479771d0ae1782038b63)
(cherry picked from commit 17e369511dd18d6e25e10ddb0cd9511f7ec8f469)
Colin Patrick Mccabe před 10 roky
rodič
revize
323ffccfe9

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -93,6 +93,8 @@ Release 2.7.1 - UNRELEASED
     HDFS-5215. dfs.datanode.du.reserved is not considered while computing
     available space ( Brahma Reddy Battula via Yongjun Zhang)
 
+    HDFS-8486. DN startup may cause severe data loss (Daryn Sharp via Colin P.
+    McCabe)
 
 Release 2.7.0 - 2015-04-20
 

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java

@@ -1356,9 +1356,9 @@ public class DataNode extends ReconfigurableBase
     // failures.
     checkDiskError();
 
-    initDirectoryScanner(conf);
     data.addBlockPool(nsInfo.getBlockPoolID(), conf);
     blockScanner.enableBlockPoolId(bpos.getBlockPoolId());
+    initDirectoryScanner(conf);
   }
 
   BPOfferService[] getAllBpOs() {

+ 22 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java

@@ -533,10 +533,28 @@ class BlockPoolSlice {
       // Leave both block replicas in place.
       return replica1;
     }
+    final ReplicaInfo replicaToDelete =
+        selectReplicaToDelete(replica1, replica2);
+    final ReplicaInfo replicaToKeep =
+        (replicaToDelete != replica1) ? replica1 : replica2;
+    // Update volumeMap and delete the replica
+    volumeMap.add(bpid, replicaToKeep);
+    if (replicaToDelete != null) {
+      deleteReplica(replicaToDelete);
+    }
+    return replicaToKeep;
+  }
 
+  static ReplicaInfo selectReplicaToDelete(final ReplicaInfo replica1,
+      final ReplicaInfo replica2) {
     ReplicaInfo replicaToKeep;
     ReplicaInfo replicaToDelete;
 
+    // it's the same block so don't ever delete it, even if GS or size
+    // differs.  caller should keep the one it just discovered on disk
+    if (replica1.getBlockFile().equals(replica2.getBlockFile())) {
+      return null;
+    }
     if (replica1.getGenerationStamp() != replica2.getGenerationStamp()) {
       replicaToKeep = replica1.getGenerationStamp() > replica2.getGenerationStamp()
           ? replica1 : replica2;
@@ -556,10 +574,10 @@ class BlockPoolSlice {
       LOG.debug("resolveDuplicateReplicas decide to keep " + replicaToKeep
           + ".  Will try to delete " + replicaToDelete);
     }
+    return replicaToDelete;
+  }
 
-    // Update volumeMap.
-    volumeMap.add(bpid, replicaToKeep);
-
+  private void deleteReplica(final ReplicaInfo replicaToDelete) {
     // Delete the files on disk. Failure here is okay.
     final File blockFile = replicaToDelete.getBlockFile();
     if (!blockFile.delete()) {
@@ -569,10 +587,8 @@ class BlockPoolSlice {
     if (!metaFile.delete()) {
       LOG.warn("Failed to delete meta file " + metaFile);
     }
-
-    return replicaToKeep;
   }
-  
+
   /**
    * Find out the number of bytes in the block that match its crc.
    * 

+ 34 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java

@@ -50,6 +50,7 @@ import org.apache.hadoop.util.StringUtils;
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.Matchers;
+import org.mockito.Mockito;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 
@@ -66,6 +67,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOUR
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertSame;
 import static org.junit.Assert.fail;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.anyList;
@@ -398,4 +401,35 @@ public class TestFsDatasetImpl {
       cluster.shutdown();
     }
   }
+
+  @Test
+  public void testDuplicateReplicaResolution() throws IOException {
+    FsVolumeImpl fsv1 = Mockito.mock(FsVolumeImpl.class);
+    FsVolumeImpl fsv2 = Mockito.mock(FsVolumeImpl.class);
+
+    File f1 = new File("d1/block");
+    File f2 = new File("d2/block");
+
+    ReplicaInfo replicaOlder = new FinalizedReplica(1,1,1,fsv1,f1);
+    ReplicaInfo replica = new FinalizedReplica(1,2,2,fsv1,f1);
+    ReplicaInfo replicaSame = new FinalizedReplica(1,2,2,fsv1,f1);
+    ReplicaInfo replicaNewer = new FinalizedReplica(1,3,3,fsv1,f1);
+
+    ReplicaInfo replicaOtherOlder = new FinalizedReplica(1,1,1,fsv2,f2);
+    ReplicaInfo replicaOtherSame = new FinalizedReplica(1,2,2,fsv2,f2);
+    ReplicaInfo replicaOtherNewer = new FinalizedReplica(1,3,3,fsv2,f2);
+
+    // equivalent path so don't remove either
+    assertNull(BlockPoolSlice.selectReplicaToDelete(replicaSame, replica));
+    assertNull(BlockPoolSlice.selectReplicaToDelete(replicaOlder, replica));
+    assertNull(BlockPoolSlice.selectReplicaToDelete(replicaNewer, replica));
+
+    // keep latest found replica
+    assertSame(replica,
+        BlockPoolSlice.selectReplicaToDelete(replicaOtherSame, replica));
+    assertSame(replicaOtherOlder,
+        BlockPoolSlice.selectReplicaToDelete(replicaOtherOlder, replica));
+    assertSame(replica,
+        BlockPoolSlice.selectReplicaToDelete(replicaOtherNewer, replica));
+  }
 }