瀏覽代碼

HDFS-9435. TestBlockRecovery#testRBWReplicas is failing intermittently. Contributed by Rakesh R.

Walter Su 9 年之前
父節點
當前提交
001ecf962c

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -2356,6 +2356,9 @@ Release 2.8.0 - UNRELEASED
     HDFS-9428. Fix intermittent failure of
     TestDNFencing.testQueueingWithAppend. (Masatake Iwasaki via waltersu4549)
 
+    HDFS-9435. TestBlockRecovery#testRBWReplicas is failing intermittently.
+    (Rakesh R via waltersu4549)
+
 Release 2.7.3 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 28 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java

@@ -41,6 +41,7 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.commons.logging.Log;
@@ -90,6 +91,8 @@ import org.mockito.Mockito;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 
+import com.google.common.base.Supplier;
+
 /**
  * This tests if sync all replicas in block recovery works correctly
  */
@@ -181,10 +184,35 @@ public class TestBlockRecovery {
     };
     // Trigger a heartbeat so that it acknowledges the NN as active.
     dn.getAllBpOs().get(0).triggerHeartbeatForTests();
+    waitForActiveNN();
+
     spyDN = spy(dn);
     recoveryWorker = new BlockRecoveryWorker(spyDN);
   }
 
+  /**
+   * Wait for active NN up to 15 seconds.
+   */
+  private void waitForActiveNN() {
+    try {
+      GenericTestUtils.waitFor(new Supplier<Boolean>() {
+        @Override
+        public Boolean get() {
+          return dn.getAllBpOs().get(0).getActiveNN() != null;
+        }
+      }, 1000, 15 * 1000);
+    } catch (TimeoutException e) {
+      // Here its not failing, will again do the assertions for activeNN after
+      // this waiting period and fails there if BPOS has not acknowledged
+      // any NN as active.
+      LOG.warn("Failed to get active NN", e);
+    } catch (InterruptedException e) {
+      LOG.warn("InterruptedException while waiting to see active NN", e);
+    }
+    Assert.assertNotNull("Failed to get ActiveNN",
+        dn.getAllBpOs().get(0).getActiveNN());
+  }
+
   /**
    * Cleans the resources and closes the instance of datanode
    * @throws IOException if an error occurred