Przeglądaj źródła

HDFS-17117. Print reconstructionQueuesInitProgress periodically when BlockManager processMisReplicatesAsync. (#5877). Contributed by Haiyang Hu.

Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
huhaiyang 1 rok temu
rodzic
commit
1d09dcc614

+ 5 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java

@@ -884,6 +884,11 @@ public class NamenodeBeanMetrics
     return 0;
     return 0;
   }
   }
 
 
+  @Override
+  public float getReconstructionQueuesInitProgress() {
+    return 0;
+  }
+
   private Router getRouter() throws IOException {
   private Router getRouter() throws IOException {
     if (this.router == null) {
     if (this.router == null) {
       throw new IOException("Router is not initialized");
       throw new IOException("Router is not initialized");

+ 8 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

@@ -449,7 +449,7 @@ public class BlockManager implements BlockStatsMXBean {
   /**
   /**
    * Progress of the Reconstruction queues initialisation.
    * Progress of the Reconstruction queues initialisation.
    */
    */
-  private double reconstructionQueuesInitProgress = 0.0;
+  private float reconstructionQueuesInitProgress = 0.0f;
 
 
   /** for block replicas placement */
   /** for block replicas placement */
   private volatile BlockPlacementPolicies placementPolicies;
   private volatile BlockPlacementPolicies placementPolicies;
@@ -3889,8 +3889,10 @@ public class BlockManager implements BlockStatsMXBean {
         totalProcessed += processed;
         totalProcessed += processed;
         // there is a possibility that if any of the blocks deleted/added during
         // there is a possibility that if any of the blocks deleted/added during
         // initialisation, then progress might be different.
         // initialisation, then progress might be different.
-        reconstructionQueuesInitProgress = Math.min((double) totalProcessed
-            / totalBlocks, 1.0);
+        if (totalBlocks > 0) { // here avoid metrics appear as NaN.
+          reconstructionQueuesInitProgress = Math.min((float) totalProcessed
+              / totalBlocks, 1.0f);
+        }
 
 
         if (!blocksItr.hasNext()) {
         if (!blocksItr.hasNext()) {
           LOG.info("Total number of blocks            = {}", blocksMap.size());
           LOG.info("Total number of blocks            = {}", blocksMap.size());
@@ -3910,6 +3912,8 @@ public class BlockManager implements BlockStatsMXBean {
         }
         }
       } finally {
       } finally {
         namesystem.writeUnlock("processMisReplicatesAsync");
         namesystem.writeUnlock("processMisReplicatesAsync");
+        LOG.info("Reconstruction queues initialisation progress: {}, total number of blocks " +
+            "processed: {}/{}", reconstructionQueuesInitProgress, totalProcessed, totalBlocks);
         // Make sure it is out of the write lock for sufficiently long time.
         // Make sure it is out of the write lock for sufficiently long time.
         Thread.sleep(sleepDuration);
         Thread.sleep(sleepDuration);
       }
       }
@@ -3924,7 +3928,7 @@ public class BlockManager implements BlockStatsMXBean {
    * 
    * 
    * @return Returns values between 0 and 1 for the progress.
    * @return Returns values between 0 and 1 for the progress.
    */
    */
-  public double getReconstructionQueuesInitProgress() {
+  public float getReconstructionQueuesInitProgress() {
     return reconstructionQueuesInitProgress;
     return reconstructionQueuesInitProgress;
   }
   }
 
 

+ 9 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -4888,6 +4888,15 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     return blockManager.getPendingSPSPaths();
     return blockManager.getPendingSPSPaths();
   }
   }
 
 
+  /**
+   * Get the progress of the reconstruction queues initialisation.
+   */
+  @Override // FSNamesystemMBean
+  @Metric
+  public float getReconstructionQueuesInitProgress() {
+    return blockManager.getReconstructionQueuesInitProgress();
+  }
+
   /**
   /**
    * Returns the length of the wait Queue for the FSNameSystemLock.
    * Returns the length of the wait Queue for the FSNameSystemLock.
    *
    *

+ 7 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java

@@ -261,4 +261,11 @@ public interface FSNamesystemMBean {
    * @return The number of paths to be processed by sps.
    * @return The number of paths to be processed by sps.
    */
    */
   int getPendingSPSPaths();
   int getPendingSPSPaths();
+
+  /**
+   * Get the progress of the reconstruction queues initialisation.
+   *
+   * @return Returns values between 0 and 1 for the progress.
+   */
+  float getReconstructionQueuesInitProgress();
 }
 }

+ 44 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemMBean.java

@@ -17,6 +17,7 @@
  */
  */
 package org.apache.hadoop.hdfs.server.namenode;
 package org.apache.hadoop.hdfs.server.namenode;
 
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNotNull;
 
 
@@ -33,9 +34,12 @@ import javax.management.ObjectName;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.metrics2.impl.ConfigBuilder;
 import org.apache.hadoop.metrics2.impl.ConfigBuilder;
 import org.apache.hadoop.metrics2.impl.TestMetricsConfig;
 import org.apache.hadoop.metrics2.impl.TestMetricsConfig;
+import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.Test;
 import org.junit.Test;
 import org.eclipse.jetty.util.ajax.JSON;
 import org.eclipse.jetty.util.ajax.JSON;
 
 
@@ -225,4 +229,44 @@ public class TestFSNamesystemMBean {
       }
       }
     }
     }
   }
   }
+
+  /**
+   * Test metrics associated with reconstructionQueuesInitProgress.
+   */
+  @Test
+  public void testReconstructionQueuesInitProgressMetrics() throws Exception {
+    Configuration conf = new Configuration();
+    try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build()) {
+      cluster.waitActive();
+      final FSNamesystem fsNamesystem = cluster.getNamesystem();
+      final DistributedFileSystem fs = cluster.getFileSystem();
+
+      // Validate init reconstructionQueuesInitProgress value.
+      assertEquals(0.0, fsNamesystem.getReconstructionQueuesInitProgress(), 0);
+      MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
+      ObjectName mxbeanName =
+          new ObjectName("Hadoop:service=NameNode,name=FSNamesystemState");
+      float reconstructionQueuesInitProgress =
+          (float) mbs.getAttribute(mxbeanName, "ReconstructionQueuesInitProgress");
+      assertEquals(0.0, reconstructionQueuesInitProgress, 0);
+
+      // Create file.
+      Path file = new Path("/test");
+      long fileLength = 1024 * 1024 * 3;
+      DFSTestUtil.createFile(fs, file, fileLength, (short) 1, 0L);
+      DFSTestUtil.waitReplication(fs, file, (short) 1);
+
+      // Restart nameNode to run processMisReplicatedBlocks.
+      cluster.restartNameNode(true);
+
+      // Validate reconstructionQueuesInitProgress value.
+      GenericTestUtils.waitFor(
+          () -> cluster.getNamesystem().getReconstructionQueuesInitProgress() == 1.0,
+          100, 5 * 1000);
+
+      reconstructionQueuesInitProgress =
+          (float) mbs.getAttribute(mxbeanName, "ReconstructionQueuesInitProgress");
+      assertEquals(1.0, reconstructionQueuesInitProgress, 0);
+    }
+  }
 }
 }