Pārlūkot izejas kodu

YARN-5336 Limit the flow name size & consider cleanup for hex chars. Contributed by Sushil Ks

Vrushali C 6 gadi atpakaļ
vecāks
revīzija
0ec962ac8f

+ 13 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

@@ -2782,6 +2782,19 @@ public class YarnConfiguration extends Configuration {
   public static final String DEFAULT_HDFS_LOCATION_FLOW_RUN_COPROCESSOR_JAR =
       "/hbase/coprocessor/hadoop-yarn-server-timelineservice.jar";
 
+  /**
+   * This setting controls the max size of the flow name getting generated
+   * in ATSv2 after removing UUID if present.
+   * */
+  public static final String FLOW_NAME_MAX_SIZE =
+      TIMELINE_SERVICE_PREFIX + "flowname.max-size";
+
+  /**
+   * Default setting for flow name size has no size restriction
+   * after removing UUID if present.
+   */
+  public static final int FLOW_NAME_DEFAULT_MAX_SIZE = 0;
+
     /**
    * The name for setting that points to an optional HBase configuration
    * (hbase-site.xml file) with settings that will override the ones found on

+ 32 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/timeline/TimelineUtils.java

@@ -24,6 +24,8 @@ import java.net.InetSocketAddress;
 import com.fasterxml.jackson.core.JsonGenerationException;
 import com.fasterxml.jackson.databind.JsonMappingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.hadoop.classification.InterfaceStability.Evolving;
 import org.apache.hadoop.conf.Configuration;
@@ -182,6 +184,36 @@ public class TimelineUtils {
     return FLOW_NAME_TAG_PREFIX + ":" + flowName;
   }
 
+  /**
+   * Shortens the flow name for the configured size by removing UUID if present.
+   *
+   * @param flowName which has to be shortened
+   * @param conf to resize the flow name
+   * @return shortened flowName
+   */
+  public static String shortenFlowName(String flowName, Configuration conf) {
+    if (flowName == null) {
+      return null;
+    }
+    // remove UUID inside flowname if present
+    flowName = removeUUID(flowName);
+    // resize flowname
+    int length = conf.getInt(YarnConfiguration.FLOW_NAME_MAX_SIZE,
+        YarnConfiguration.FLOW_NAME_DEFAULT_MAX_SIZE);
+    if (length <= 0) {
+      return flowName;
+    }
+    return StringUtils.substring(flowName, 0, length);
+  }
+
+  @VisibleForTesting
+  static String removeUUID(String flowName) {
+    flowName = StringUtils.replaceAll(flowName,
+        "-?([a-fA-F0-9]{8}-[a-fA-F0-9]{4}-" +
+        "[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}){1}", "");
+    return flowName;
+  }
+
   /**
    * Generate flow version tag.
    *

+ 11 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml

@@ -2660,6 +2660,17 @@
     <value></value>
   </property>
 
+  <property>
+    <description>
+      Removes the UUID if represent and limit the the flowname length with
+      the given value for ATSv2. In case the value is negative or 0,
+      it only removes the UUID and does not limit the flow name.
+    </description>
+    <name>yarn.timeline-service.flowname.max-size
+    </name>
+    <value>0</value>
+  </property>
+
   <!--  Shared Cache Configuration -->
 
   <property>

+ 52 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/timeline/TestShortenedFlowName.java

@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.util.timeline;
+
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.UUID;
+
+/**
+ * Test case for limiting flow name size.
+ */
+public class TestShortenedFlowName {
+
+  private static final String TEST_FLOW_NAME = "TestFlowName";
+
+  @Test
+  public void testRemovingUUID() {
+    String flowName = TEST_FLOW_NAME + "-" + UUID.randomUUID();
+    flowName = TimelineUtils.removeUUID(flowName);
+    Assert.assertEquals(TEST_FLOW_NAME, flowName);
+  }
+
+  @Test
+  public void testShortenedFlowName() {
+    YarnConfiguration conf = new YarnConfiguration();
+    String flowName = TEST_FLOW_NAME + UUID.randomUUID();
+    conf.setInt(YarnConfiguration.FLOW_NAME_MAX_SIZE, 8);
+    String shortenedFlowName = TimelineUtils.shortenFlowName(flowName, conf);
+    Assert.assertEquals("TestFlow", shortenedFlowName);
+    conf.setInt(YarnConfiguration.FLOW_NAME_MAX_SIZE,
+        YarnConfiguration.FLOW_NAME_DEFAULT_MAX_SIZE);
+    shortenedFlowName = TimelineUtils.shortenFlowName(flowName, conf);
+    Assert.assertEquals(TEST_FLOW_NAME, shortenedFlowName);
+  }
+}

+ 7 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/TimelineContext.java

@@ -18,6 +18,10 @@
 
 package org.apache.hadoop.yarn.server.timelineservice;
 
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.util.timeline.TimelineUtils;
+
 /**
  * Encapsulates timeline context information.
  */
@@ -28,6 +32,7 @@ public class TimelineContext {
   private String flowName;
   private Long flowRunId;
   private String appId;
+  private static final Configuration DEFAULT_CONF = new YarnConfiguration();
 
   public TimelineContext() {
     this(null, null, null, 0L, null);
@@ -99,7 +104,7 @@ public class TimelineContext {
       Long flowRunId, String appId) {
     this.clusterId = clusterId;
     this.userId = userId;
-    this.flowName = flowName;
+    this.flowName = TimelineUtils.shortenFlowName(flowName, DEFAULT_CONF);
     this.flowRunId = flowRunId;
     this.appId = appId;
   }
@@ -125,7 +130,7 @@ public class TimelineContext {
   }
 
   public void setFlowName(String flow) {
-    this.flowName = flow;
+    this.flowName = TimelineUtils.shortenFlowName(flow, DEFAULT_CONF);
   }
 
   public Long getFlowRunId() {