Browse Source

HDFS-14167. RBF: Add stale nodes to federation metrics. Contributed by Inigo Goiri.

Inigo Goiri 6 years ago
parent
commit
1dc01e59af
11 changed files with 60 additions and 4 deletions
  1. 6 0
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMBean.java
  2. 6 0
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMetrics.java
  3. 6 1
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java
  4. 1 0
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MembershipNamenodeResolver.java
  5. 15 3
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/NamenodeStatusReport.java
  6. 1 0
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/NamenodeHeartbeatService.java
  7. 4 0
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MembershipStats.java
  8. 10 0
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/impl/pb/MembershipStatsPBImpl.java
  9. 1 0
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/proto/FederationProtocol.proto
  10. 7 0
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/metrics/TestFederationMetrics.java
  11. 3 0
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestMembershipState.java

+ 6 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMBean.java

@@ -106,6 +106,12 @@ public interface FederationMBean {
    */
   int getNumDeadNodes();
 
+  /**
+   * Get the number of stale datanodes.
+   * @return Number of stale datanodes.
+   */
+  int getNumStaleNodes();
+
   /**
    * Get the number of decommissioning datanodes.
    * @return Number of decommissioning datanodes.

+ 6 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMetrics.java

@@ -413,6 +413,12 @@ public class FederationMetrics implements FederationMBean {
     return getNameserviceAggregatedInt(MembershipStats::getNumOfDeadDatanodes);
   }
 
+  @Override
+  public int getNumStaleNodes() {
+    return getNameserviceAggregatedInt(
+        MembershipStats::getNumOfStaleDatanodes);
+  }
+
   @Override
   public int getNumDecommissioningNodes() {
     return getNameserviceAggregatedInt(

+ 6 - 1
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java

@@ -631,7 +631,12 @@ public class NamenodeBeanMetrics
 
   @Override
   public int getNumStaleDataNodes() {
-    return -1;
+    try {
+      return getFederationMetrics().getNumStaleNodes();
+    } catch (IOException e) {
+      LOG.debug("Failed to get number of stale nodes", e.getMessage());
+    }
+    return 0;
   }
 
   @Override

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MembershipNamenodeResolver.java

@@ -280,6 +280,7 @@ public class MembershipNamenodeResolver
           report.getNumDecommissioningDatanodes());
       stats.setNumOfActiveDatanodes(report.getNumLiveDatanodes());
       stats.setNumOfDeadDatanodes(report.getNumDeadDatanodes());
+      stats.setNumOfStaleDatanodes(report.getNumStaleDatanodes());
       stats.setNumOfDecomActiveDatanodes(report.getNumDecomLiveDatanodes());
       stats.setNumOfDecomDeadDatanodes(report.getNumDecomDeadDatanodes());
       record.setStats(stats);

+ 15 - 3
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/NamenodeStatusReport.java

@@ -42,6 +42,7 @@ public class NamenodeStatusReport {
   /** Datanodes stats. */
   private int liveDatanodes = -1;
   private int deadDatanodes = -1;
+  private int staleDatanodes = -1;
   /** Decommissioning datanodes. */
   private int decomDatanodes = -1;
   /** Live decommissioned datanodes. */
@@ -223,14 +224,16 @@ public class NamenodeStatusReport {
    *
    * @param numLive Number of live nodes.
    * @param numDead Number of dead nodes.
+   * @param numStale Number of stale nodes.
    * @param numDecom Number of decommissioning nodes.
    * @param numLiveDecom Number of decommissioned live nodes.
    * @param numDeadDecom Number of decommissioned dead nodes.
    */
-  public void setDatanodeInfo(int numLive, int numDead, int numDecom,
-      int numLiveDecom, int numDeadDecom) {
+  public void setDatanodeInfo(int numLive, int numDead, int numStale,
+      int numDecom, int numLiveDecom, int numDeadDecom) {
     this.liveDatanodes = numLive;
     this.deadDatanodes = numDead;
+    this.staleDatanodes = numStale;
     this.decomDatanodes = numDecom;
     this.liveDecomDatanodes = numLiveDecom;
     this.deadDecomDatanodes = numDeadDecom;
@@ -247,7 +250,7 @@ public class NamenodeStatusReport {
   }
 
   /**
-   * Get the number of dead blocks.
+   * Get the number of dead nodes.
    *
    * @return The number of dead nodes.
    */
@@ -255,6 +258,15 @@ public class NamenodeStatusReport {
     return this.deadDatanodes;
   }
 
+  /**
+   * Get the number of stale nodes.
+   *
+   * @return The number of stale nodes.
+   */
+  public int getNumStaleDatanodes() {
+    return this.staleDatanodes;
+  }
+
   /**
    * Get the number of decommissionining nodes.
    *

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/NamenodeHeartbeatService.java

@@ -338,6 +338,7 @@ public class NamenodeHeartbeatService extends PeriodicService {
             report.setDatanodeInfo(
                 jsonObject.getInt("NumLiveDataNodes"),
                 jsonObject.getInt("NumDeadDataNodes"),
+                jsonObject.getInt("NumStaleDataNodes"),
                 jsonObject.getInt("NumDecommissioningDataNodes"),
                 jsonObject.getInt("NumDecomLiveDataNodes"),
                 jsonObject.getInt("NumDecomDeadDataNodes"));

+ 4 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MembershipStats.java

@@ -81,6 +81,10 @@ public abstract class MembershipStats extends BaseRecord {
 
   public abstract int getNumOfDeadDatanodes();
 
+  public abstract void setNumOfStaleDatanodes(int nodes);
+
+  public abstract int getNumOfStaleDatanodes();
+
   public abstract void setNumOfDecommissioningDatanodes(int nodes);
 
   public abstract int getNumOfDecommissioningDatanodes();

+ 10 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/impl/pb/MembershipStatsPBImpl.java

@@ -168,6 +168,16 @@ public class MembershipStatsPBImpl extends MembershipStats
     return this.translator.getProtoOrBuilder().getNumOfDeadDatanodes();
   }
 
+  @Override
+  public void setNumOfStaleDatanodes(int nodes) {
+    this.translator.getBuilder().setNumOfStaleDatanodes(nodes);
+  }
+
+  @Override
+  public int getNumOfStaleDatanodes() {
+    return this.translator.getProtoOrBuilder().getNumOfStaleDatanodes();
+  }
+
   @Override
   public void setNumOfDecommissioningDatanodes(int nodes) {
     this.translator.getBuilder().setNumOfDecommissioningDatanodes(nodes);

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/proto/FederationProtocol.proto

@@ -45,6 +45,7 @@ message NamenodeMembershipStatsRecordProto {
   optional uint32 numOfDecommissioningDatanodes = 22;
   optional uint32 numOfDecomActiveDatanodes = 23;
   optional uint32 numOfDecomDeadDatanodes = 24;
+  optional uint32 numOfStaleDatanodes = 25;
 }
 
 message NamenodeMembershipRecordProto {

+ 7 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/metrics/TestFederationMetrics.java

@@ -137,6 +137,8 @@ public class TestFederationMetrics extends TestMetricsBase {
           stats.getNumOfActiveDatanodes());
       assertEquals(json.getLong("numOfDeadDatanodes"),
           stats.getNumOfDeadDatanodes());
+      assertEquals(json.getLong("numOfStaleDatanodes"),
+          stats.getNumOfStaleDatanodes());
       assertEquals(json.getLong("numOfDecommissioningDatanodes"),
           stats.getNumOfDecommissioningDatanodes());
       assertEquals(json.getLong("numOfDecomActiveDatanodes"),
@@ -187,6 +189,8 @@ public class TestFederationMetrics extends TestMetricsBase {
           json.getLong("numOfActiveDatanodes"));
       assertEquals(stats.getNumOfDeadDatanodes(),
           json.getLong("numOfDeadDatanodes"));
+      assertEquals(stats.getNumOfStaleDatanodes(),
+          json.getLong("numOfStaleDatanodes"));
       assertEquals(stats.getNumOfDecommissioningDatanodes(),
           json.getLong("numOfDecommissioningDatanodes"));
       assertEquals(stats.getNumOfDecomActiveDatanodes(),
@@ -260,6 +264,7 @@ public class TestFederationMetrics extends TestMetricsBase {
     long numBlocks = 0;
     long numLive = 0;
     long numDead = 0;
+    long numStale = 0;
     long numDecom = 0;
     long numDecomLive = 0;
     long numDecomDead = 0;
@@ -269,6 +274,7 @@ public class TestFederationMetrics extends TestMetricsBase {
       numBlocks += stats.getNumOfBlocks();
       numLive += stats.getNumOfActiveDatanodes();
       numDead += stats.getNumOfDeadDatanodes();
+      numStale += stats.getNumOfStaleDatanodes();
       numDecom += stats.getNumOfDecommissioningDatanodes();
       numDecomLive += stats.getNumOfDecomActiveDatanodes();
       numDecomDead += stats.getNumOfDecomDeadDatanodes();
@@ -277,6 +283,7 @@ public class TestFederationMetrics extends TestMetricsBase {
     assertEquals(numBlocks, bean.getNumBlocks());
     assertEquals(numLive, bean.getNumLiveNodes());
     assertEquals(numDead, bean.getNumDeadNodes());
+    assertEquals(numStale, bean.getNumStaleNodes());
     assertEquals(numDecom, bean.getNumDecommissioningNodes());
     assertEquals(numDecomLive, bean.getNumDecomLiveNodes());
     assertEquals(numDecomDead, bean.getNumDecomDeadNodes());

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestMembershipState.java

@@ -47,6 +47,7 @@ public class TestMembershipState {
   private static final long NUM_BLOCKS = 300;
   private static final long NUM_FILES = 400;
   private static final int NUM_DEAD = 500;
+  private static final int NUM_STALE = 550;
   private static final int NUM_ACTIVE = 600;
   private static final int NUM_DECOM = 700;
   private static final int NUM_DECOM_ACTIVE = 800;
@@ -73,6 +74,7 @@ public class TestMembershipState {
     stats.setNumOfFiles(NUM_FILES);
     stats.setNumOfActiveDatanodes(NUM_ACTIVE);
     stats.setNumOfDeadDatanodes(NUM_DEAD);
+    stats.setNumOfStaleDatanodes(NUM_STALE);
     stats.setNumOfDecommissioningDatanodes(NUM_DECOM);
     stats.setNumOfDecomActiveDatanodes(NUM_DECOM_ACTIVE);
     stats.setNumOfDecomDeadDatanodes(NUM_DECOM_DEAD);
@@ -101,6 +103,7 @@ public class TestMembershipState {
     assertEquals(NUM_FILES, stats.getNumOfFiles());
     assertEquals(NUM_ACTIVE, stats.getNumOfActiveDatanodes());
     assertEquals(NUM_DEAD, stats.getNumOfDeadDatanodes());
+    assertEquals(NUM_STALE, stats.getNumOfStaleDatanodes());
     assertEquals(NUM_DECOM, stats.getNumOfDecommissioningDatanodes());
     assertEquals(NUM_DECOM_ACTIVE, stats.getNumOfDecomActiveDatanodes());
     assertEquals(NUM_DECOM_DEAD, stats.getNumOfDecomDeadDatanodes());