瀏覽代碼

HDFS-17704. Fix TestDecommission and TestDecommissionWithBackoffMonitor often run timeout. (#7266). Contributed by hfutatzhanghb.

Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
hfutatzhanghb 3 月之前
父節點
當前提交
053afb7063

+ 4 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AdminStatesBaseTest.java

@@ -28,10 +28,12 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
+import java.util.concurrent.TimeUnit;
 
 import org.apache.hadoop.util.Lists;
 import org.junit.Rule;
 import org.junit.rules.TemporaryFolder;
+import org.junit.rules.Timeout;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -69,6 +71,8 @@ public class AdminStatesBaseTest {
 
   @Rule
   public TemporaryFolder baseDir = new TemporaryFolder();
+  @Rule
+  public Timeout timeout = new Timeout(600, TimeUnit.SECONDS);
 
   private HostsFileWriter hostsFileWriter;
   private Configuration conf;

+ 26 - 25
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java

@@ -188,7 +188,7 @@ public class TestDecommission extends AdminStatesBaseTest {
   /**
    * Tests decommission for non federated cluster
    */
-  @Test(timeout=360000)
+  @Test
   public void testDecommission() throws IOException {
     testDecommission(1, 6);
   }
@@ -198,7 +198,7 @@ public class TestDecommission extends AdminStatesBaseTest {
    * to other datanodes and satisfy the replication factor. Make sure the
    * datanode won't get stuck in decommissioning state.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testDecommission2() throws IOException {
     LOG.info("Starting test testDecommission");
     int numNamenodes = 1;
@@ -247,7 +247,7 @@ public class TestDecommission extends AdminStatesBaseTest {
   /**
    * Test decommission for federeated cluster
    */
-  @Test(timeout=360000)
+  @Test
   public void testDecommissionFederation() throws IOException {
     testDecommission(2, 2);
   }
@@ -262,7 +262,7 @@ public class TestDecommission extends AdminStatesBaseTest {
    * That creates inconsistent state and prevent SBN from finishing
    * decommission.
    */
-  @Test(timeout=360000)
+  @Test
   public void testDecommissionOnStandby() throws Exception {
     getConf().setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
     getConf().setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY,
@@ -435,7 +435,7 @@ public class TestDecommission extends AdminStatesBaseTest {
   /**
    * Test that over-replicated blocks are deleted on recommission.
    */
-  @Test(timeout=120000)
+  @Test
   public void testRecommission() throws Exception {
     final int numDatanodes = 6;
     try {
@@ -516,7 +516,7 @@ public class TestDecommission extends AdminStatesBaseTest {
    * Tests cluster storage statistics during decommissioning for non
    * federated cluster
    */
-  @Test(timeout=360000)
+  @Test
   public void testClusterStats() throws Exception {
     testClusterStats(1);
   }
@@ -525,7 +525,7 @@ public class TestDecommission extends AdminStatesBaseTest {
    * Tests cluster storage statistics during decommissioning for
    * federated cluster
    */
-  @Test(timeout=360000)
+  @Test
   public void testClusterStatsFederation() throws Exception {
     testClusterStats(3);
   }
@@ -575,7 +575,7 @@ public class TestDecommission extends AdminStatesBaseTest {
    * in the include file are allowed to connect to the namenode in a non
    * federated cluster.
    */
-  @Test(timeout=360000)
+  @Test
   public void testHostsFile() throws IOException, InterruptedException {
     // Test for a single namenode cluster
     testHostsFile(1);
@@ -586,7 +586,7 @@ public class TestDecommission extends AdminStatesBaseTest {
    * in the include file are allowed to connect to the namenode in a 
    * federated cluster.
    */
-  @Test(timeout=360000)
+  @Test
   public void testHostsFileFederation()
       throws IOException, InterruptedException {
     // Test for 3 namenode federated cluster
@@ -624,7 +624,7 @@ public class TestDecommission extends AdminStatesBaseTest {
     }
   }
   
-  @Test(timeout=120000)
+  @Test
   public void testDecommissionWithOpenfile()
       throws IOException, InterruptedException {
     LOG.info("Starting test testDecommissionWithOpenfile");
@@ -676,7 +676,7 @@ public class TestDecommission extends AdminStatesBaseTest {
     fdos.close();
   }
 
-  @Test(timeout = 20000)
+  @Test
   public void testDecommissionWithUnknownBlock() throws IOException {
     startCluster(1, 3);
 
@@ -795,7 +795,7 @@ public class TestDecommission extends AdminStatesBaseTest {
     }
   }
 
-  @Test(timeout=180000)
+  @Test
   public void testDecommissionWithOpenfileReporting()
       throws Exception {
     LOG.info("Starting test testDecommissionWithOpenfileReporting");
@@ -901,7 +901,7 @@ public class TestDecommission extends AdminStatesBaseTest {
    * 2. close file with decommissioning
    * @throws Exception
    */
-  @Test(timeout=360000)
+  @Test
   public void testDecommissionWithCloseFileAndListOpenFiles()
       throws Exception {
     LOG.info("Starting test testDecommissionWithCloseFileAndListOpenFiles");
@@ -958,7 +958,7 @@ public class TestDecommission extends AdminStatesBaseTest {
     fileSys.delete(file, false);
   }
 
-  @Test(timeout = 360000)
+  @Test
   public void testDecommissionWithOpenFileAndBlockRecovery()
       throws IOException, InterruptedException {
     startCluster(1, 6);
@@ -1005,7 +1005,7 @@ public class TestDecommission extends AdminStatesBaseTest {
     assertEquals(dfs.getFileStatus(file).getLen(), writtenBytes);
   }
 
-  @Test(timeout=120000)
+  @Test
   public void testCloseWhileDecommission() throws IOException,
       ExecutionException, InterruptedException {
     LOG.info("Starting test testCloseWhileDecommission");
@@ -1064,7 +1064,7 @@ public class TestDecommission extends AdminStatesBaseTest {
    * to the IBR, all three nodes dn1/dn2/dn3 enter Decommissioning and then the
    * DN reports the IBR.
    */
-  @Test(timeout=120000)
+  @Test
   public void testAllocAndIBRWhileDecommission() throws IOException {
     LOG.info("Starting test testAllocAndIBRWhileDecommission");
     getConf().setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY,
@@ -1149,7 +1149,7 @@ public class TestDecommission extends AdminStatesBaseTest {
   /**
    * Tests restart of namenode while datanode hosts are added to exclude file
    **/
-  @Test(timeout=360000)
+  @Test
   public void testDecommissionWithNamenodeRestart()
       throws IOException, InterruptedException {
     LOG.info("Starting test testDecommissionWithNamenodeRestart");
@@ -1201,7 +1201,7 @@ public class TestDecommission extends AdminStatesBaseTest {
   /**
    * Tests dead node count after restart of namenode
    **/
-  @Test(timeout=360000)
+  @Test
   public void testDeadNodeCountAfterNamenodeRestart()throws Exception {
     LOG.info("Starting test testDeadNodeCountAfterNamenodeRestart");
     int numNamenodes = 1;
@@ -1248,7 +1248,7 @@ public class TestDecommission extends AdminStatesBaseTest {
    * valid DNS hostname for the DataNode.  See HDFS-5237 for background.
    */
   @Ignore
-  @Test(timeout=360000)
+  @Test
   public void testIncludeByRegistrationName() throws Exception {
     // Any IPv4 address starting with 127 functions as a "loopback" address
     // which is connected to the current host.  So by choosing 127.0.0.100
@@ -1314,7 +1314,7 @@ public class TestDecommission extends AdminStatesBaseTest {
     }, 500, 5000);
   }
   
-  @Test(timeout=120000)
+  @Test
   public void testBlocksPerInterval() throws Exception {
     GenericTestUtils.setLogLevel(
         LoggerFactory.getLogger(DatanodeAdminManager.class), Level.TRACE);
@@ -1369,7 +1369,7 @@ public class TestDecommission extends AdminStatesBaseTest {
   /**
    * Test DatanodeAdminManager#monitor can swallow any exceptions by default.
    */
-  @Test(timeout=120000)
+  @Test
   public void testPendingNodeButDecommissioned() throws Exception {
     // Only allow one node to be decom'd at a time
     getConf().setInt(
@@ -1416,7 +1416,7 @@ public class TestDecommission extends AdminStatesBaseTest {
     }
   }
 
-  @Test(timeout=120000)
+  @Test
   public void testPendingNodes() throws Exception {
     GenericTestUtils.setLogLevel(
         LoggerFactory.getLogger(DatanodeAdminManager.class), Level.TRACE);
@@ -1639,7 +1639,7 @@ public class TestDecommission extends AdminStatesBaseTest {
   /**
    * Verify if multiple DataNodes can be decommission at the same time.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testMultipleNodesDecommission() throws Exception {
     startCluster(1, 5);
     final Path file = new Path("/testMultipleNodesDecommission.dat");
@@ -1685,7 +1685,7 @@ public class TestDecommission extends AdminStatesBaseTest {
    * Force the tracked nodes set to be filled with nodes lost while decommissioning,
    * then decommission healthy nodes & validate they are decommissioned eventually.
    */
-  @Test(timeout = 120000)
+  @Test
   public void testRequeueUnhealthyDecommissioningNodes() throws Exception {
     // Create a MiniDFSCluster with 3 live datanode in AdminState=NORMAL and
     // 2 dead datanodes in AdminState=DECOMMISSION_INPROGRESS and a file
@@ -1911,7 +1911,8 @@ public class TestDecommission extends AdminStatesBaseTest {
   under-replicated block can be replicated to sufficient datanodes & the decommissioning
   node can be decommissioned.
    */
-  @Test(timeout = 60000)
+  @SuppressWarnings("checkstyle:methodlength")
+  @Test
   public void testDeleteCorruptReplicaForUnderReplicatedBlock() throws Exception {
     // Constants
     final Path file = new Path("/test-file");

+ 25 - 25
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMaintenanceState.java

@@ -87,7 +87,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
   /**
    * Test valid value range for the config namenode.maintenance.replication.min.
    */
-  @Test (timeout = 60000)
+  @Test
   public void testMaintenanceMinReplConfigRange() {
     LOG.info("Setting testMaintenanceMinReplConfigRange");
 
@@ -120,7 +120,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
    * Verify a node can transition from AdminStates.ENTERING_MAINTENANCE to
    * AdminStates.NORMAL.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testTakeNodeOutOfEnteringMaintenance() throws Exception {
     LOG.info("Starting testTakeNodeOutOfEnteringMaintenance");
     final int replicas = 1;
@@ -149,7 +149,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
    * Verify a AdminStates.ENTERING_MAINTENANCE node can expire and transition
    * to AdminStates.NORMAL upon timeout.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testEnteringMaintenanceExpiration() throws Exception {
     LOG.info("Starting testEnteringMaintenanceExpiration");
     final int replicas = 1;
@@ -173,7 +173,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
   /**
    * Verify node stays in AdminStates.NORMAL with invalid expiration.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testInvalidExpiration() throws Exception {
     LOG.info("Starting testInvalidExpiration");
     final int replicas = 1;
@@ -195,7 +195,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
    * When a dead node is put to maintenance, it transitions directly to
    * AdminStates.IN_MAINTENANCE.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testPutDeadNodeToMaintenance() throws Exception {
     LOG.info("Starting testPutDeadNodeToMaintenance");
     final int replicas = 1;
@@ -229,7 +229,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
    * AdminStates.IN_MAINTENANCE. Then AdminStates.IN_MAINTENANCE expires and
    * transitions to AdminStates.NORMAL.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testPutDeadNodeToMaintenanceWithExpiration() throws Exception {
     LOG.info("Starting testPutDeadNodeToMaintenanceWithExpiration");
     final Path file =
@@ -266,7 +266,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
   /**
    * Transition from decommissioned state to maintenance state.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testTransitionFromDecommissioned() throws IOException {
     LOG.info("Starting testTransitionFromDecommissioned");
     final Path file = new Path("/testTransitionFromDecommissioned.dat");
@@ -289,7 +289,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
    * Transition from decommissioned state to maintenance state.
    * After the maintenance state expires, it is transitioned to NORMAL.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testTransitionFromDecommissionedAndExpired() throws IOException {
     LOG.info("Starting testTransitionFromDecommissionedAndExpired");
     final Path file =
@@ -320,7 +320,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
    * If node becomes dead when it is in AdminStates.ENTERING_MAINTENANCE, it
    * should stay in AdminStates.ENTERING_MAINTENANCE state.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testNodeDeadWhenInEnteringMaintenance() throws Exception {
     LOG.info("Starting testNodeDeadWhenInEnteringMaintenance");
     final int numNamenodes = 1;
@@ -366,7 +366,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
    * DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY and
    * its file's replication factor into account.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testExpectedReplications() throws IOException {
     LOG.info("Starting testExpectedReplications");
     testExpectedReplication(1);
@@ -411,7 +411,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
    * Verify a node can transition directly to AdminStates.IN_MAINTENANCE when
    * DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY is set to zero.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testZeroMinMaintenanceReplication() throws Exception {
     LOG.info("Starting testZeroMinMaintenanceReplication");
     setMinMaintenanceR(0);
@@ -434,7 +434,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
    * DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY is set to zero. Then later
    * transition to NORMAL after maintenance expiration.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testZeroMinMaintenanceReplicationWithExpiration()
       throws Exception {
     LOG.info("Starting testZeroMinMaintenanceReplicationWithExpiration");
@@ -460,7 +460,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
   /**
    * Test file block replication lesser than maintenance minimum.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testFileBlockReplicationAffectingMaintenance()
       throws Exception {
     int defaultReplication = getConf().getInt(DFSConfigKeys
@@ -537,7 +537,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
   /**
    * Transition from IN_MAINTENANCE to DECOMMISSIONED.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testTransitionToDecommission() throws IOException {
     LOG.info("Starting testTransitionToDecommission");
     final int numNamenodes = 1;
@@ -581,7 +581,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
   /**
    * Transition from decommissioning state to maintenance state.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testTransitionFromDecommissioning() throws IOException {
     LOG.info("Starting testTransitionFromDecommissioning");
     startCluster(1, 3);
@@ -612,7 +612,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
    * in decommission. Make sure decommission process take
    * maintenance replica into account.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testDecommissionDifferentNodeAfterMaintenances()
       throws Exception {
     testDecommissionDifferentNodeAfterMaintenance(2);
@@ -657,7 +657,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
    * Verify if multiple DataNodes can transition to maintenance state
    * at the same time.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testMultipleNodesMaintenance() throws Exception {
     startCluster(1, 5);
     final Path file = new Path("/testMultipleNodesMaintenance.dat");
@@ -691,7 +691,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
     cleanupFile(fileSys, file);
   }
 
-  @Test(timeout = 360000)
+  @Test
   public void testChangeReplicationFactors() throws IOException {
     // Prior to any change, there is 1 maintenance node and 2 live nodes.
 
@@ -762,7 +762,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
    * c. Take the node out of maintenance => NN should schedule the replication
    *    and end up with 3 live.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testTakeDeadNodeOutOfMaintenance() throws Exception {
     LOG.info("Starting testTakeDeadNodeOutOfMaintenance");
     final int numNamenodes = 1;
@@ -818,7 +818,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
    * d. Restart the maintenance dn => 1 maintenance, 3 live.
    * e. Take the node out of maintenance => over replication => 3 live.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testWithNNAndDNRestart() throws Exception {
     LOG.info("Starting testWithNNAndDNRestart");
     final int numNamenodes = 1;
@@ -876,7 +876,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
   /**
    * Machine under maintenance state won't be chosen for new block allocation.
    */
-  @Test(timeout = 3600000)
+  @Test
   public void testWriteAfterMaintenance() throws IOException {
     LOG.info("Starting testWriteAfterMaintenance");
     startCluster(1, 3);
@@ -908,7 +908,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
    * Given there are minReplication replicas somewhere else,
    * it can be transitioned to AdminStates.IN_MAINTENANCE.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testEnterMaintenanceWhenFileOpen() throws Exception {
     LOG.info("Starting testEnterMaintenanceWhenFileOpen");
     startCluster(1, 3);
@@ -927,7 +927,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
   /**
    * Machine under maintenance state won't be chosen for invalidation.
    */
-  @Test(timeout = 360000)
+  @Test
   public void testInvalidation() throws IOException {
     LOG.info("Starting testInvalidation");
     int numNamenodes = 1;
@@ -959,7 +959,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
     cleanupFile(fileSys, file);
   }
 
-  @Test(timeout = 120000)
+  @Test
   public void testFileCloseAfterEnteringMaintenance() throws Exception {
     LOG.info("Starting testFileCloseAfterEnteringMaintenance");
     int expirationInMs = 30 * 1000;
@@ -1138,7 +1138,7 @@ public class TestMaintenanceState extends AdminStatesBaseTest {
     }
   }
 
-  @Test(timeout = 120000)
+  @Test
   public void testReportMaintenanceNodes() throws Exception {
     ByteArrayOutputStream out = new ByteArrayOutputStream();
     ByteArrayOutputStream err = new ByteArrayOutputStream();