Przeglądaj źródła

HDFS-8486. DN startup may cause severe data loss (Daryn Sharp via Colin P. McCabe)

(cherry picked from commit 03fb5c642589dec4e663479771d0ae1782038b63)
Colin Patrick Mccabe 10 lat temu
rodzic
commit
17e369511d

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -587,6 +587,9 @@ Release 2.7.1 - UNRELEASED
     HDFS-8451. DFSClient probe for encryption testing interprets empty URI
     property for "enabled". (Steve Loughran via xyao)
 
+    HDFS-8486. DN startup may cause severe data loss (Daryn Sharp via Colin P.
+    McCabe)
+
 Release 2.7.0 - 2015-04-20
 
   INCOMPATIBLE CHANGES

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java

@@ -1377,9 +1377,9 @@ public class DataNode extends ReconfigurableBase
     // failures.
     checkDiskError();
 
-    initDirectoryScanner(conf);
     data.addBlockPool(nsInfo.getBlockPoolID(), conf);
     blockScanner.enableBlockPoolId(bpos.getBlockPoolId());
+    initDirectoryScanner(conf);
   }
 
   List<BPOfferService> getAllBpOs() {

+ 22 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java

@@ -556,10 +556,28 @@ class BlockPoolSlice {
       // Leave both block replicas in place.
       return replica1;
     }
+    final ReplicaInfo replicaToDelete =
+        selectReplicaToDelete(replica1, replica2);
+    final ReplicaInfo replicaToKeep =
+        (replicaToDelete != replica1) ? replica1 : replica2;
+    // Update volumeMap and delete the replica
+    volumeMap.add(bpid, replicaToKeep);
+    if (replicaToDelete != null) {
+      deleteReplica(replicaToDelete);
+    }
+    return replicaToKeep;
+  }
 
+  static ReplicaInfo selectReplicaToDelete(final ReplicaInfo replica1,
+      final ReplicaInfo replica2) {
     ReplicaInfo replicaToKeep;
     ReplicaInfo replicaToDelete;
 
+    // it's the same block so don't ever delete it, even if GS or size
+    // differs.  caller should keep the one it just discovered on disk
+    if (replica1.getBlockFile().equals(replica2.getBlockFile())) {
+      return null;
+    }
     if (replica1.getGenerationStamp() != replica2.getGenerationStamp()) {
       replicaToKeep = replica1.getGenerationStamp() > replica2.getGenerationStamp()
           ? replica1 : replica2;
@@ -579,10 +597,10 @@ class BlockPoolSlice {
       LOG.debug("resolveDuplicateReplicas decide to keep " + replicaToKeep
           + ".  Will try to delete " + replicaToDelete);
     }
+    return replicaToDelete;
+  }
 
-    // Update volumeMap.
-    volumeMap.add(bpid, replicaToKeep);
-
+  private void deleteReplica(final ReplicaInfo replicaToDelete) {
     // Delete the files on disk. Failure here is okay.
     final File blockFile = replicaToDelete.getBlockFile();
     if (!blockFile.delete()) {
@@ -592,10 +610,8 @@ class BlockPoolSlice {
     if (!metaFile.delete()) {
       LOG.warn("Failed to delete meta file " + metaFile);
     }
-
-    return replicaToKeep;
   }
-  
+
   /**
    * Find out the number of bytes in the block that match its crc.
    * 

+ 34 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java

@@ -51,6 +51,7 @@ import org.apache.hadoop.util.StringUtils;
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.Matchers;
+import org.mockito.Mockito;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 
@@ -66,6 +67,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOUR
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertSame;
 import static org.junit.Assert.fail;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.anyListOf;
@@ -411,4 +414,35 @@ public class TestFsDatasetImpl {
       cluster.shutdown();
     }
   }
+
+  @Test
+  public void testDuplicateReplicaResolution() throws IOException {
+    FsVolumeImpl fsv1 = Mockito.mock(FsVolumeImpl.class);
+    FsVolumeImpl fsv2 = Mockito.mock(FsVolumeImpl.class);
+
+    File f1 = new File("d1/block");
+    File f2 = new File("d2/block");
+
+    ReplicaInfo replicaOlder = new FinalizedReplica(1,1,1,fsv1,f1);
+    ReplicaInfo replica = new FinalizedReplica(1,2,2,fsv1,f1);
+    ReplicaInfo replicaSame = new FinalizedReplica(1,2,2,fsv1,f1);
+    ReplicaInfo replicaNewer = new FinalizedReplica(1,3,3,fsv1,f1);
+
+    ReplicaInfo replicaOtherOlder = new FinalizedReplica(1,1,1,fsv2,f2);
+    ReplicaInfo replicaOtherSame = new FinalizedReplica(1,2,2,fsv2,f2);
+    ReplicaInfo replicaOtherNewer = new FinalizedReplica(1,3,3,fsv2,f2);
+
+    // equivalent path so don't remove either
+    assertNull(BlockPoolSlice.selectReplicaToDelete(replicaSame, replica));
+    assertNull(BlockPoolSlice.selectReplicaToDelete(replicaOlder, replica));
+    assertNull(BlockPoolSlice.selectReplicaToDelete(replicaNewer, replica));
+
+    // keep latest found replica
+    assertSame(replica,
+        BlockPoolSlice.selectReplicaToDelete(replicaOtherSame, replica));
+    assertSame(replicaOtherOlder,
+        BlockPoolSlice.selectReplicaToDelete(replicaOtherOlder, replica));
+    assertSame(replica,
+        BlockPoolSlice.selectReplicaToDelete(replicaOtherNewer, replica));
+  }
 }