Browse Source

HDFS-8096. DatanodeMetrics#blocksReplicated will get incremented early and even for failed transfers (Contributed by Vinayakumar B)

Vinayakumar B 10 years ago
parent
commit
9d8952f97f

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -451,6 +451,9 @@ Release 2.8.0 - UNRELEASED
     HDFS-7725. Incorrect "nodes in service" metrics caused all writes to fail.
     (Ming Ma via wang)
 
+    HDFS-8096. DatanodeMetrics#blocksReplicated will get incremented early and
+    even for failed transfers (vinayakumarb)
+
 Release 2.7.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 0 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java

@@ -656,7 +656,6 @@ class BPOfferService {
       // Send a copy of a block to another datanode
       dn.transferBlocks(bcmd.getBlockPoolId(), bcmd.getBlocks(),
           bcmd.getTargets(), bcmd.getTargetStorageTypes());
-      dn.metrics.incrBlocksReplicated(bcmd.getBlocks().length);
       break;
     case DatanodeProtocol.DNA_INVALIDATE:
       //

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java

@@ -2174,6 +2174,8 @@ public class DataNode extends ReconfigurableBase
                   + Arrays.asList(targets));
             }
           }
+        } else {
+          metrics.incrBlocksReplicated();
         }
       } catch (IOException ie) {
         LOG.warn(bpReg + ":Failed to transfer " + b + " to " +

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java

@@ -210,8 +210,8 @@ public class DataNodeMetrics {
     cacheReports.add(latency);
   }
 
-  public void incrBlocksReplicated(int delta) {
-    blocksReplicated.incr(delta);
+  public void incrBlocksReplicated() {
+    blocksReplicated.incr();
   }
 
   public void incrBlocksWritten() {

+ 33 - 4
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java

@@ -21,17 +21,15 @@ import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
 import static org.apache.hadoop.test.MetricsAsserts.assertQuantileGauges;
 import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
 import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.*;
 
 import java.io.Closeable;
 import java.io.IOException;
 import java.lang.management.ManagementFactory;
 import java.util.List;
-import java.util.Map;
 
 import com.google.common.collect.Lists;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -45,6 +43,8 @@ import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.metrics2.MetricsRecordBuilder;
 import org.apache.hadoop.util.Time;
@@ -293,4 +293,33 @@ public class TestDataNodeMetrics {
     }
   }
 
+  @Test
+  public void testDatanodeBlocksReplicatedMetric() throws Exception {
+    Configuration conf = new HdfsConfiguration();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
+    try {
+      FileSystem fs = cluster.getFileSystem();
+      List<DataNode> datanodes = cluster.getDataNodes();
+      assertEquals(datanodes.size(), 1);
+      DataNode datanode = datanodes.get(0);
+
+      MetricsRecordBuilder rb = getMetrics(datanode.getMetrics().name());
+      long blocksReplicated = getLongCounter("BlocksReplicated", rb);
+      assertEquals("No blocks replicated yet", 0, blocksReplicated);
+
+      Path path = new Path("/counter.txt");
+      DFSTestUtil.createFile(fs, path, 1024, (short) 2, Time.monotonicNow());
+      cluster.startDataNodes(conf, 1, true, StartupOption.REGULAR, null);
+      ExtendedBlock firstBlock = DFSTestUtil.getFirstBlock(fs, path);
+      DFSTestUtil.waitForReplication(cluster, firstBlock, 1, 2, 0);
+
+      MetricsRecordBuilder rbNew = getMetrics(datanode.getMetrics().name());
+      blocksReplicated = getLongCounter("BlocksReplicated", rbNew);
+      assertEquals("blocks replicated counter incremented", 1, blocksReplicated);
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
 }