فهرست منبع

HDFS-11468. Ozone: SCM: Add Node Metrics for SCM. Contributed by Yiqun Lin.

Yiqun Lin 7 سال پیش
والد
کامیت
6783dad766

+ 41 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/StorageContainerManager.java

@@ -69,6 +69,8 @@ import org.apache.hadoop.ozone.scm.block.BlockManager;
 import org.apache.hadoop.ozone.scm.block.BlockManagerImpl;
 import org.apache.hadoop.ozone.scm.container.ContainerMapping;
 import org.apache.hadoop.ozone.scm.container.Mapping;
+import org.apache.hadoop.ozone.scm.container.placement.metrics.ContainerStat;
+import org.apache.hadoop.ozone.scm.container.placement.metrics.SCMMetrics;
 import org.apache.hadoop.ozone.scm.exceptions.SCMException;
 import org.apache.hadoop.ozone.scm.node.NodeManager;
 import org.apache.hadoop.ozone.scm.node.SCMNodeManager;
@@ -163,6 +165,9 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl
   private final String scmUsername;
   private final Collection<String> scmAdminUsernames;
 
+  /** SCM metrics. */
+  private static SCMMetrics metrics;
+
   /**
    * Creates a new StorageContainerManager.  Configuration will be updated with
    * information on the actual listening addresses used for RPC servers.
@@ -177,6 +182,7 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl
     final int cacheSize = conf.getInt(OZONE_SCM_DB_CACHE_SIZE_MB,
         OZONE_SCM_DB_CACHE_SIZE_DEFAULT);
 
+    StorageContainerManager.initMetrics();
     // TODO : Fix the ClusterID generation code.
     scmNodeManager = new SCMNodeManager(conf, UUID.randomUUID().toString());
     scmContainerManager = new ContainerMapping(conf, scmNodeManager, cacheSize);
@@ -673,6 +679,7 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl
       LOG.error("SCM block manager service stop failed.", ex);
     }
 
+    metrics.unRegister();
     unregisterMXBean();
     IOUtils.cleanupWithLogger(LOG, scmContainerManager);
     IOUtils.cleanupWithLogger(LOG, scmBlockManager);
@@ -752,6 +759,27 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl
   @Override
   public ContainerReportsResponseProto sendContainerReport(
       ContainerReportsRequestProto reports) throws IOException {
+    // TODO: We should update the logic once incremental container report
+    // type is supported.
+    if (reports.getType() ==
+        ContainerReportsRequestProto.reportType.fullReport) {
+      ContainerStat stat = new ContainerStat();
+      for (StorageContainerDatanodeProtocolProtos.ContainerInfo info : reports
+          .getReportsList()) {
+        stat.add(new ContainerStat(info.getSize(), info.getUsed(),
+            info.getKeyCount(), info.getReadBytes(), info.getWriteBytes(),
+            info.getReadCount(), info.getWriteCount()));
+      }
+
+      // update container metrics
+      metrics.setLastContainerReportSize(stat.getSize().get());
+      metrics.setLastContainerReportUsed(stat.getUsed().get());
+      metrics.setLastContainerReportKeyCount(stat.getKeyCount().get());
+      metrics.setLastContainerReportReadBytes(stat.getReadBytes().get());
+      metrics.setLastContainerReportWriteBytes(stat.getWriteBytes().get());
+      metrics.setLastContainerReportReadCount(stat.getReadCount().get());
+      metrics.setLastContainerReportWriteCount(stat.getWriteCount().get());
+    }
 
     // TODO: handle the container reports either here or add container report
     // handler.
@@ -914,4 +942,17 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl
     }
   }
 
+  /**
+   * Initialize SCM metrics.
+   */
+  public static void initMetrics() {
+    metrics = SCMMetrics.create();
+  }
+
+  /**
+   * Return SCM metrics instance.
+   */
+  public static SCMMetrics getMetrics() {
+    return metrics == null ? SCMMetrics.create() : metrics;
+  }
 }

+ 128 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/container/placement/metrics/ContainerStat.java

@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.scm.container.placement.metrics;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * This class represents the SCM container stat.
+ */
+public class ContainerStat {
+  /**
+   * The maximum container size.
+   */
+  private LongMetric size;
+
+  /**
+   * The number of bytes used by the container.
+   */
+  private LongMetric used;
+
+  /**
+   * The number of keys in the container.
+   */
+  private LongMetric keyCount;
+
+  /**
+   * The number of bytes read from the container.
+   */
+  private LongMetric readBytes;
+
+  /**
+   * The number of bytes write into the container.
+   */
+  private LongMetric writeBytes;
+
+  /**
+   * The number of times the container is read.
+   */
+  private LongMetric readCount;
+
+  /**
+   * The number of times the container is written into .
+   */
+  private LongMetric writeCount;
+
+  public ContainerStat() {
+    this(0L, 0L, 0L, 0L, 0L, 0L, 0L);
+  }
+
+  public ContainerStat(long size, long used, long keyCount, long readBytes,
+      long writeBytes, long readCount, long writeCount) {
+    Preconditions.checkArgument(size >= 0,
+        "Container size cannot be " + "negative.");
+    Preconditions.checkArgument(used >= 0,
+        "Used space cannot be " + "negative.");
+    Preconditions.checkArgument(keyCount >= 0,
+        "Key count cannot be " + "negative");
+    Preconditions.checkArgument(readBytes >= 0,
+        "Read bytes read cannot be " + "negative.");
+    Preconditions.checkArgument(readBytes >= 0,
+        "Write bytes cannot be " + "negative.");
+    Preconditions.checkArgument(readCount >= 0,
+        "Read count cannot be " + "negative.");
+    Preconditions.checkArgument(writeCount >= 0,
+        "Write count cannot be " + "negative");
+
+    this.size = new LongMetric(size);
+    this.used = new LongMetric(used);
+    this.keyCount = new LongMetric(keyCount);
+    this.readBytes = new LongMetric(readBytes);
+    this.writeBytes = new LongMetric(writeBytes);
+    this.readCount = new LongMetric(readCount);
+    this.writeCount = new LongMetric(writeCount);
+  }
+
+  public LongMetric getSize() {
+    return size;
+  }
+
+  public LongMetric getUsed() {
+    return used;
+  }
+
+  public LongMetric getKeyCount() {
+    return keyCount;
+  }
+
+  public LongMetric getReadBytes() {
+    return readBytes;
+  }
+
+  public LongMetric getWriteBytes() {
+    return writeBytes;
+  }
+
+  public LongMetric getReadCount() {
+    return readCount;
+  }
+
+  public LongMetric getWriteCount() {
+    return writeCount;
+  }
+
+  public void add(ContainerStat stat) {
+    this.size.add(stat.getSize().get());
+    this.used.add(stat.getUsed().get());
+    this.keyCount.add(stat.getKeyCount().get());
+    this.readBytes.add(stat.getReadBytes().get());
+    this.writeBytes.add(stat.getWriteBytes().get());
+    this.readCount.add(stat.getReadCount().get());
+    this.writeCount.add(stat.getWriteCount().get());
+  }
+}

+ 87 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/ozone/scm/container/placement/metrics/SCMMetrics.java

@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.scm.container.placement.metrics;
+
+import org.apache.hadoop.metrics2.MetricsSystem;
+import org.apache.hadoop.metrics2.annotation.Metric;
+import org.apache.hadoop.metrics2.annotation.Metrics;
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
+
+/**
+ * This class is for maintaining StorageContainerManager statistics.
+ */
+@Metrics(about="Storage Container Manager Metrics", context="dfs")
+public class SCMMetrics {
+  public static final String SOURCE_NAME =
+      SCMMetrics.class.getSimpleName();
+
+  /**
+   * Container stat metrics, the meaning of following metrics
+   * can be found in {@link ContainerStat}.
+   */
+  @Metric private MutableGaugeLong lastContainerReportSize;
+  @Metric private MutableGaugeLong lastContainerReportUsed;
+  @Metric private MutableGaugeLong lastContainerReportKeyCount;
+  @Metric private MutableGaugeLong lastContainerReportReadBytes;
+  @Metric private MutableGaugeLong lastContainerReportWriteBytes;
+  @Metric private MutableGaugeLong lastContainerReportReadCount;
+  @Metric private MutableGaugeLong lastContainerReportWriteCount;
+
+  public SCMMetrics() {
+  }
+
+  public static SCMMetrics create() {
+    MetricsSystem ms = DefaultMetricsSystem.instance();
+    return ms.register(SOURCE_NAME, "Storage Container Manager Metrics",
+        new SCMMetrics());
+  }
+
+  public void setLastContainerReportSize(long size) {
+    this.lastContainerReportSize.set(size);
+  }
+
+  public void setLastContainerReportUsed(long used) {
+    this.lastContainerReportUsed.set(used);
+  }
+
+  public void setLastContainerReportKeyCount(long keyCount) {
+    this.lastContainerReportKeyCount.set(keyCount);
+  }
+
+  public void setLastContainerReportReadBytes(long readBytes) {
+    this.lastContainerReportReadBytes.set(readBytes);
+  }
+
+  public void setLastContainerReportWriteBytes(long writeBytes) {
+    this.lastContainerReportWriteBytes.set(writeBytes);
+  }
+
+  public void setLastContainerReportReadCount(long readCount) {
+    this.lastContainerReportReadCount.set(readCount);
+  }
+
+  public void setLastContainerReportWriteCount(long writeCount) {
+    this.lastContainerReportWriteCount.set(writeCount);
+  }
+
+  public void unRegister() {
+    MetricsSystem ms = DefaultMetricsSystem.instance();
+    ms.unregisterSource(SOURCE_NAME);
+  }
+}

+ 20 - 0
hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/OzoneMetrics.md

@@ -98,6 +98,26 @@ RPC operations.
 | `GetSmallFile` | Get small file operations |
 | `CloseContainer` | Close container operations |
 
+### Storage Container Manager Metrics
+
+The metrics for containers that managed by Storage Container Manager.
+
+Storage Container Manager (SCM) is a master service which keeps track of
+replicas of storage containers. It also manages all data nodes and their
+states, dealing with container reports and dispatching commands for execution.
+
+Following are the counters for containers:
+
+| Name | Description |
+|:---- |:---- |
+| `LastContainerReportSize` | Total size in bytes of all containers |
+| `LastContainerReportUsed` | Total number of bytes used by all containers |
+| `LastContainerReportKeyCount` | Total number of keys in all containers |
+| `LastContainerReportReadBytes` | Total number of bytes have been read from all containers |
+| `LastContainerReportWriteBytes` | Total number of bytes have been written into all containers |
+| `LastContainerReportReadCount` | Total number of times containers have been read from |
+| `LastContainerReportWriteCount` | Total number of times containers have been written to |
+
 ### Key Space Metrics
 
 The metrics for various key space manager operations in HDFS Ozone.

+ 116 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/ozone/scm/TestSCMMetrics.java

@@ -0,0 +1,116 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.scm;
+
+import static org.apache.hadoop.test.MetricsAsserts.getLongGauge;
+import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
+import static org.junit.Assert.assertEquals;
+
+import java.util.UUID;
+
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.hadoop.conf.OzoneConfiguration;
+import org.apache.hadoop.metrics2.MetricsRecordBuilder;
+import org.apache.hadoop.ozone.MiniOzoneCluster;
+import org.apache.hadoop.ozone.OzoneConsts;
+import org.apache.hadoop.ozone.container.common.SCMTestUtils;
+import org.apache.hadoop.ozone.container.common.helpers.ContainerReport;
+import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos;
+import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto;
+import org.apache.hadoop.ozone.scm.container.placement.metrics.ContainerStat;
+import org.apache.hadoop.ozone.scm.container.placement.metrics.SCMMetrics;
+import org.junit.Test;
+
+/**
+ * This class tests the metrics of Storage Container Manager.
+ */
+public class TestSCMMetrics {
+  private static MiniOzoneCluster cluster = null;
+
+  @Test
+  public void testContainerMetrics() throws Exception {
+    int nodeCount = 2;
+    int numReport = 2;
+    long size = OzoneConsts.GB * 5;
+    long used = OzoneConsts.GB * 2;
+    long readBytes = OzoneConsts.GB * 1;
+    long writeBytes = OzoneConsts.GB * 2;
+    int keyCount = 1000;
+    int readCount = 100;
+    int writeCount = 50;
+    OzoneConfiguration conf = new OzoneConfiguration();
+
+    try {
+      cluster = new MiniOzoneCluster.Builder(conf)
+          .setHandlerType(OzoneConsts.OZONE_HANDLER_DISTRIBUTED)
+          .numDataNodes(nodeCount).build();
+
+      ContainerStat stat = new ContainerStat(size, used, keyCount, readBytes,
+          writeBytes, readCount, writeCount);
+      StorageContainerManager scmManager = cluster.getStorageContainerManager();
+      scmManager.sendContainerReport(createContainerReport(numReport, stat));
+
+      // verify container stat metrics
+      MetricsRecordBuilder scmMetrics = getMetrics(SCMMetrics.SOURCE_NAME);
+      assertEquals(size * numReport,
+          getLongGauge("LastContainerReportSize", scmMetrics));
+      assertEquals(used * numReport,
+          getLongGauge("LastContainerReportUsed", scmMetrics));
+      assertEquals(readBytes * numReport,
+          getLongGauge("LastContainerReportReadBytes", scmMetrics));
+      assertEquals(writeBytes * numReport,
+          getLongGauge("LastContainerReportWriteBytes", scmMetrics));
+
+      assertEquals(keyCount * numReport,
+          getLongGauge("LastContainerReportKeyCount", scmMetrics));
+      assertEquals(readCount * numReport,
+          getLongGauge("LastContainerReportReadCount", scmMetrics));
+      assertEquals(writeCount * numReport,
+          getLongGauge("LastContainerReportWriteCount", scmMetrics));
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+
+  private ContainerReportsRequestProto createContainerReport(int numReport,
+      ContainerStat stat) {
+    StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto.Builder
+        reportsBuilder = StorageContainerDatanodeProtocolProtos
+        .ContainerReportsRequestProto.newBuilder();
+
+    for (int i = 0; i < numReport; i++) {
+      ContainerReport report = new ContainerReport(
+          UUID.randomUUID().toString(), DigestUtils.sha256Hex("Simulated"));
+      report.setSize(stat.getSize().get());
+      report.setBytesUsed(stat.getUsed().get());
+      report.setReadCount(stat.getReadCount().get());
+      report.setReadBytes(stat.getReadBytes().get());
+      report.setKeyCount(stat.getKeyCount().get());
+      report.setWriteCount(stat.getWriteCount().get());
+      report.setWriteBytes(stat.getWriteBytes().get());
+      reportsBuilder.addReports(report.getProtoBufMessage());
+    }
+    reportsBuilder.setDatanodeID(SCMTestUtils.getDatanodeID()
+        .getProtoBufMessage());
+    reportsBuilder.setType(StorageContainerDatanodeProtocolProtos
+        .ContainerReportsRequestProto.reportType.fullReport);
+    return reportsBuilder.build();
+  }
+}