瀏覽代碼

HDFS-5350. Name Node should report fsimage transfer time as a metric. Contributed by Jimmy Xiang.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1551427 13f79535-47bb-0310-9956-ffa450edef68
Andrew Wang 11 年之前
父節點
當前提交
e8b27cc5a2

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -156,6 +156,9 @@ Release 2.4.0 - UNRELEASED
     HDFS-5652. Refactor invalid block token exception handling in DFSInputStream.
     HDFS-5652. Refactor invalid block token exception handling in DFSInputStream.
     (Liang Xie via junping_du)
     (Liang Xie via junping_du)
 
 
+    HDFS-5350. Name Node should report fsimage transfer time as a metric.
+    (Jimmy Xiang via wang)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
     HDFS-5239.  Allow FSNamesystem lock fairness to be configurable (daryn)
     HDFS-5239.  Allow FSNamesystem lock fairness to be configurable (daryn)

+ 22 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/GetImageServlet.java

@@ -17,6 +17,8 @@
  */
  */
 package org.apache.hadoop.hdfs.server.namenode;
 package org.apache.hadoop.hdfs.server.namenode;
 
 
+import static org.apache.hadoop.util.Time.now;
+
 import java.security.PrivilegedExceptionAction;
 import java.security.PrivilegedExceptionAction;
 import java.util.*;
 import java.util.*;
 import java.io.*;
 import java.io.*;
@@ -42,6 +44,7 @@ import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.server.common.JspHelper;
 import org.apache.hadoop.hdfs.server.common.JspHelper;
 import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.common.StorageInfo;
+import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
 import org.apache.hadoop.hdfs.util.DataTransferThrottler;
 import org.apache.hadoop.hdfs.util.DataTransferThrottler;
 import org.apache.hadoop.hdfs.util.MD5FileUtils;
 import org.apache.hadoop.hdfs.util.MD5FileUtils;
@@ -89,6 +92,7 @@ public class GetImageServlet extends HttpServlet {
       final GetImageParams parsedParams = new GetImageParams(request, response);
       final GetImageParams parsedParams = new GetImageParams(request, response);
       final Configuration conf = 
       final Configuration conf = 
         (Configuration)getServletContext().getAttribute(JspHelper.CURRENT_CONF);
         (Configuration)getServletContext().getAttribute(JspHelper.CURRENT_CONF);
+      final NameNodeMetrics metrics = NameNode.getNameNodeMetrics();
       
       
       if (UserGroupInformation.isSecurityEnabled() && 
       if (UserGroupInformation.isSecurityEnabled() && 
           !isValidRequestor(context, request.getUserPrincipal().getName(), conf)) {
           !isValidRequestor(context, request.getUserPrincipal().getName(), conf)) {
@@ -129,14 +133,26 @@ public class GetImageServlet extends HttpServlet {
               throw new IOException(errorMessage);
               throw new IOException(errorMessage);
             }
             }
             CheckpointFaultInjector.getInstance().beforeGetImageSetsHeaders();
             CheckpointFaultInjector.getInstance().beforeGetImageSetsHeaders();
+            long start = now();
             serveFile(imageFile);
             serveFile(imageFile);
+
+            if (metrics != null) { // Metrics non-null only when used inside name node
+              long elapsed = now() - start;
+              metrics.addGetImage(elapsed);
+            }
           } else if (parsedParams.isGetEdit()) {
           } else if (parsedParams.isGetEdit()) {
             long startTxId = parsedParams.getStartTxId();
             long startTxId = parsedParams.getStartTxId();
             long endTxId = parsedParams.getEndTxId();
             long endTxId = parsedParams.getEndTxId();
             
             
             File editFile = nnImage.getStorage()
             File editFile = nnImage.getStorage()
                 .findFinalizedEditsFile(startTxId, endTxId);
                 .findFinalizedEditsFile(startTxId, endTxId);
+            long start = now();
             serveFile(editFile);
             serveFile(editFile);
+
+            if (metrics != null) { // Metrics non-null only when used inside name node
+              long elapsed = now() - start;
+              metrics.addGetEdit(elapsed);
+            }
           } else if (parsedParams.isPutImage()) {
           } else if (parsedParams.isPutImage()) {
             final long txid = parsedParams.getTxId();
             final long txid = parsedParams.getTxId();
 
 
@@ -160,12 +176,18 @@ public class GetImageServlet extends HttpServlet {
                 UserGroupInformation.getCurrentUser().checkTGTAndReloginFromKeytab();
                 UserGroupInformation.getCurrentUser().checkTGTAndReloginFromKeytab();
               }
               }
               
               
+              long start = now();
               // issue a HTTP get request to download the new fsimage 
               // issue a HTTP get request to download the new fsimage 
               MD5Hash downloadImageDigest =
               MD5Hash downloadImageDigest =
                 TransferFsImage.downloadImageToStorage(
                 TransferFsImage.downloadImageToStorage(
                         parsedParams.getInfoServer(), txid,
                         parsedParams.getInfoServer(), txid,
                         nnImage.getStorage(), true);
                         nnImage.getStorage(), true);
               nnImage.saveDigestAndRenameCheckpointImage(txid, downloadImageDigest);
               nnImage.saveDigestAndRenameCheckpointImage(txid, downloadImageDigest);
+
+              if (metrics != null) { // Metrics non-null only when used inside name node
+                long elapsed = now() - start;
+                metrics.addPutImage(elapsed);
+              }
               
               
               // Now that we have a new checkpoint, we might be able to
               // Now that we have a new checkpoint, we might be able to
               // remove some old ones.
               // remove some old ones.

+ 19 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java

@@ -85,6 +85,13 @@ public class NameNodeMetrics {
   @Metric("Time loading FS Image at startup in msec")
   @Metric("Time loading FS Image at startup in msec")
   MutableGaugeInt fsImageLoadTime;
   MutableGaugeInt fsImageLoadTime;
 
 
+  @Metric("GetImageServlet getEdit")
+  MutableRate getEdit;
+  @Metric("GetImageServlet getImage")
+  MutableRate getImage;
+  @Metric("GetImageServlet putImage")
+  MutableRate putImage;
+
   NameNodeMetrics(String processName, String sessionId, int[] intervals) {
   NameNodeMetrics(String processName, String sessionId, int[] intervals) {
     registry.tag(ProcessName, processName).tag(SessionId, sessionId);
     registry.tag(ProcessName, processName).tag(SessionId, sessionId);
     
     
@@ -232,4 +239,16 @@ public class NameNodeMetrics {
   public void setSafeModeTime(long elapsed) {
   public void setSafeModeTime(long elapsed) {
     safeModeTime.set((int) elapsed);
     safeModeTime.set((int) elapsed);
   }
   }
+
+  public void addGetEdit(long latency) {
+    getEdit.add(latency);
+  }
+
+  public void addGetImage(long latency) {
+    getImage.add(latency);
+  }
+
+  public void addPutImage(long latency) {
+    putImage.add(latency);
+  }
 }
 }

+ 13 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java

@@ -20,6 +20,9 @@ package org.apache.hadoop.hdfs.server.namenode;
 import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
 import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
 import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.assertNNHasCheckpoints;
 import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.assertNNHasCheckpoints;
 import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.getNameNodeCurrentDirs;
 import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.getNameNodeCurrentDirs;
+import static org.apache.hadoop.test.MetricsAsserts.assertCounterGt;
+import static org.apache.hadoop.test.MetricsAsserts.assertGaugeGt;
+import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNotNull;
@@ -74,6 +77,7 @@ import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
 import org.apache.hadoop.hdfs.tools.DFSAdmin;
 import org.apache.hadoop.hdfs.tools.DFSAdmin;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.metrics2.MetricsRecordBuilder;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
 import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
 import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
 import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
@@ -107,6 +111,7 @@ public class TestCheckpoint {
   }
   }
 
 
   static final Log LOG = LogFactory.getLog(TestCheckpoint.class); 
   static final Log LOG = LogFactory.getLog(TestCheckpoint.class); 
+  static final String NN_METRICS = "NameNodeActivity";
   
   
   static final long seed = 0xDEADBEEFL;
   static final long seed = 0xDEADBEEFL;
   static final int blockSize = 4096;
   static final int blockSize = 4096;
@@ -1055,6 +1060,14 @@ public class TestCheckpoint {
       //
       //
       secondary = startSecondaryNameNode(conf);
       secondary = startSecondaryNameNode(conf);
       secondary.doCheckpoint();
       secondary.doCheckpoint();
+
+      MetricsRecordBuilder rb = getMetrics(NN_METRICS);
+      assertCounterGt("GetImageNumOps", 0, rb);
+      assertCounterGt("GetEditNumOps", 0, rb);
+      assertCounterGt("PutImageNumOps", 0, rb);
+      assertGaugeGt("GetImageAvgTime", 0.0, rb);
+      assertGaugeGt("GetEditAvgTime", 0.0, rb);
+      assertGaugeGt("PutImageAvgTime", 0.0, rb);
     } finally {
     } finally {
       fileSys.close();
       fileSys.close();
       cleanup(secondary);
       cleanup(secondary);