Selaa lähdekoodia

HDFS-12607. [READ] Even one dead datanode with PROVIDED storage results in ProvidedStorageInfo being marked as FAILED

Virajith Jalaparti 7 vuotta sitten
vanhempi
commit
71d0a82571

+ 4 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java

@@ -455,8 +455,10 @@ public class DatanodeDescriptor extends DatanodeInfo {
       totalDfsUsed += report.getDfsUsed();
       totalNonDfsUsed += report.getNonDfsUsed();
 
-      if (StorageType.PROVIDED.equals(
-          report.getStorage().getStorageType())) {
+      // for PROVIDED storages, do not call updateStorage() unless
+      // DatanodeStorageInfo already exists!
+      if (StorageType.PROVIDED.equals(report.getStorage().getStorageType())
+          && storageMap.get(report.getStorage().getStorageID()) == null) {
         continue;
       }
       DatanodeStorageInfo storage = updateStorage(report.getStorage());

+ 27 - 13
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ProvidedStorageMap.java

@@ -66,7 +66,6 @@ public class ProvidedStorageMap {
   // limit to a single provider for now
   private RwLock lock;
   private BlockManager bm;
-  private boolean hasDNs = false;
   private BlockAliasMap aliasMap;
 
   private final String storageId;
@@ -123,6 +122,11 @@ public class ProvidedStorageMap {
       BlockReportContext context) throws IOException {
     if (providedEnabled && storageId.equals(s.getStorageID())) {
       if (StorageType.PROVIDED.equals(s.getStorageType())) {
+        if (providedStorageInfo.getState() == State.FAILED
+            && s.getState() == State.NORMAL) {
+          providedStorageInfo.setState(State.NORMAL);
+          LOG.info("Provided storage transitioning to state " + State.NORMAL);
+        }
         processProvidedStorageReport(context);
         dn.injectStorage(providedStorageInfo);
         return providedDescriptor.getProvidedStorage(dn, s);
@@ -135,21 +139,14 @@ public class ProvidedStorageMap {
   private void processProvidedStorageReport(BlockReportContext context)
       throws IOException {
     assert lock.hasWriteLock() : "Not holding write lock";
-    if (hasDNs) {
-      return;
-    }
-    if (providedStorageInfo.getBlockReportCount() == 0) {
+    if (providedStorageInfo.getBlockReportCount() == 0
+        || providedDescriptor.activeProvidedDatanodes() == 0) {
       LOG.info("Calling process first blk report from storage: "
           + providedStorageInfo);
       // first pass; periodic refresh should call bm.processReport
       bm.processFirstBlockReport(providedStorageInfo,
           new ProvidedBlockList(aliasMap.getReader(null).iterator()));
-    } else {
-      bm.processReport(providedStorageInfo,
-          new ProvidedBlockList(aliasMap.getReader(null).iterator()),
-          context);
     }
-    hasDNs = true;
   }
 
   @VisibleForTesting
@@ -167,9 +164,10 @@ public class ProvidedStorageMap {
   public void removeDatanode(DatanodeDescriptor dnToRemove) {
     if (providedEnabled) {
       assert lock.hasWriteLock() : "Not holding write lock";
-      int remainingDatanodes = providedDescriptor.remove(dnToRemove);
-      if (remainingDatanodes == 0) {
-        hasDNs = false;
+      providedDescriptor.remove(dnToRemove);
+      // if all datanodes fail, set the block report count to 0
+      if (providedDescriptor.activeProvidedDatanodes() == 0) {
+        providedStorageInfo.setBlockReportCount(0);
       }
     }
   }
@@ -466,6 +464,22 @@ public class ProvidedStorageMap {
         return false;
       }
     }
+
+    @Override
+    void setState(DatanodeStorage.State state) {
+      if (state == State.FAILED) {
+        // The state should change to FAILED only when there are no active
+        // datanodes with PROVIDED storage.
+        ProvidedDescriptor dn = (ProvidedDescriptor) getDatanodeDescriptor();
+        if (dn.activeProvidedDatanodes() == 0) {
+          LOG.info("Provided storage {} transitioning to state {}",
+              this, State.FAILED);
+          super.setState(state);
+        }
+      } else {
+        super.setState(state);
+      }
+    }
   }
   /**
    * Used to emulate block reports for provided blocks.

+ 40 - 0
hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeProvidedImplementation.java

@@ -492,4 +492,44 @@ public class TestNameNodeProvidedImplementation {
           dnInfos[0].getXferAddr());
     }
   }
+
+  @Test(timeout=300000)
+  public void testTransientDeadDatanodes() throws Exception {
+    createImage(new FSTreeWalk(NAMEPATH, conf), NNDIRPATH,
+            FixedBlockResolver.class);
+    // 2 Datanodes, 1 PROVIDED and other DISK
+    startCluster(NNDIRPATH, 2, null,
+        new StorageType[][] {
+            {StorageType.PROVIDED},
+            {StorageType.DISK}},
+        false);
+
+    DataNode providedDatanode = cluster.getDataNodes().get(0);
+
+    DFSClient client = new DFSClient(new InetSocketAddress("localhost",
+            cluster.getNameNodePort()), cluster.getConfiguration(0));
+
+    for (int i= 0; i < numFiles; i++) {
+      String filename = "/" + filePrefix + i + fileSuffix;
+
+      DatanodeInfo[] dnInfos = getAndCheckBlockLocations(client, filename, 1);
+      // location should be the provided DN.
+      assertTrue(dnInfos[0].getDatanodeUuid()
+          .equals(providedDatanode.getDatanodeUuid()));
+
+      // NameNode thinks the datanode is down
+      BlockManagerTestUtil.noticeDeadDatanode(
+          cluster.getNameNode(),
+          providedDatanode.getDatanodeId().getXferAddr());
+      cluster.waitActive();
+      cluster.triggerHeartbeats();
+      Thread.sleep(1000);
+
+      // should find the block on the 2nd provided datanode.
+      dnInfos = getAndCheckBlockLocations(client, filename, 1);
+      assertTrue(
+          dnInfos[0].getDatanodeUuid()
+              .equals(providedDatanode.getDatanodeUuid()));
+    }
+  }
 }