浏览代码

AMBARI-7981 Use static mapred-site.xml file for MR by using ${hdp.version} variable (dsen)

Dmytro Sen 10 年之前
父节点
当前提交
af0253506f

+ 1 - 1
ambari-server/src/main/python/UpgradeHelper_HDP2.py

@@ -455,7 +455,7 @@ MAPRED_SITE = {
   "mapreduce.tasktracker.taskmemorymanager.monitoringinterval": "DELETE_OLD",
   "mapreduce.tasktracker.taskmemorymanager.monitoringinterval": "DELETE_OLD",
   "mapreduce.tasktracker.tasks.sleeptimebeforesigkill": "DELETE_OLD",
   "mapreduce.tasktracker.tasks.sleeptimebeforesigkill": "DELETE_OLD",
   "yarn.app.mapreduce.am.admin-command-opts": "-Djava.net.preferIPv4Stack=true -Dhadoop.metrics.log.level=WARN",
   "yarn.app.mapreduce.am.admin-command-opts": "-Djava.net.preferIPv4Stack=true -Dhadoop.metrics.log.level=WARN",
-  "yarn.app.mapreduce.am.command-opts": "-Xmx312m",
+  "yarn.app.mapreduce.am.command-opts": "-Xmx312m -Dhdp.version=${hdp.version}",
   "yarn.app.mapreduce.am.log.level": "INFO",
   "yarn.app.mapreduce.am.log.level": "INFO",
   "yarn.app.mapreduce.am.resource.mb": "512",
   "yarn.app.mapreduce.am.resource.mb": "512",
   "yarn.app.mapreduce.am.staging-dir": "/user"
   "yarn.app.mapreduce.am.staging-dir": "/user"

+ 2 - 0
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/package/scripts/historyserver.py

@@ -20,6 +20,7 @@ Ambari Agent
 """
 """
 import sys
 import sys
 from resource_management import *
 from resource_management import *
+from resource_management.libraries.functions.dynamic_variable_interpretation import copy_tarballs_to_hdfs
 
 
 from yarn import yarn
 from yarn import yarn
 from service import service
 from service import service
@@ -37,6 +38,7 @@ class HistoryServer(Script):
     import params
     import params
     env.set_params(params)
     env.set_params(params)
     self.configure(env) # FOR SECURITY
     self.configure(env) # FOR SECURITY
+    copy_tarballs_to_hdfs('mr', params.mapred_user, params.hdfs_user)
     service('historyserver', action='start', serviceName='mapreduce')
     service('historyserver', action='start', serviceName='mapreduce')
 
 
   def stop(self, env):
   def stop(self, env):

+ 3 - 4
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/package/scripts/params.py

@@ -21,6 +21,7 @@ Ambari Agent
 import os
 import os
 
 
 from resource_management import *
 from resource_management import *
+from resource_management.libraries.functions.version import compare_versions, format_hdp_stack_version
 import status_params
 import status_params
 
 
 # server configurations
 # server configurations
@@ -28,10 +29,10 @@ config = Script.get_config()
 tmp_dir = Script.get_tmp_dir()
 tmp_dir = Script.get_tmp_dir()
 
 
 hdp_stack_version = str(config['hostLevelParams']['stack_version'])
 hdp_stack_version = str(config['hostLevelParams']['stack_version'])
-stack_is_hdp22_or_further = not (hdp_stack_version.startswith('2.0') or hdp_stack_version.startswith('2.1'))
+hdp_stack_version = format_hdp_stack_version(hdp_stack_version)
 
 
 #hadoop params
 #hadoop params
-if stack_is_hdp22_or_further:
+if compare_versions(hdp_stack_version, "2.2.0.0") >= 0:
   hadoop_libexec_dir = "/usr/hdp/current/hadoop-client/libexec"
   hadoop_libexec_dir = "/usr/hdp/current/hadoop-client/libexec"
   hadoop_bin = "/usr/hdp/current/hadoop-client/sbin"
   hadoop_bin = "/usr/hdp/current/hadoop-client/sbin"
   hadoop_bin_dir = "/usr/hdp/current/hadoop-client/bin"
   hadoop_bin_dir = "/usr/hdp/current/hadoop-client/bin"
@@ -149,9 +150,7 @@ jobhistory_heapsize = default("/configurations/mapred-env/jobhistory_heapsize",
 #for create_hdfs_directory
 #for create_hdfs_directory
 hostname = config["hostname"]
 hostname = config["hostname"]
 hdfs_user_keytab = config['configurations']['hadoop-env']['hdfs_user_keytab']
 hdfs_user_keytab = config['configurations']['hadoop-env']['hdfs_user_keytab']
-hdfs_user = config['configurations']['hadoop-env']['hdfs_user']
 hdfs_principal_name = config['configurations']['hadoop-env']['hdfs_principal_name']
 hdfs_principal_name = config['configurations']['hadoop-env']['hdfs_principal_name']
-kinit_path_local = functions.get_kinit_path(["/usr/bin", "/usr/kerberos/bin", "/usr/sbin"])
 import functools
 import functools
 #create partial functions with common arguments for every HdfsDirectory call
 #create partial functions with common arguments for every HdfsDirectory call
 #to create hdfs directory we need to call params.HdfsDirectory in code
 #to create hdfs directory we need to call params.HdfsDirectory in code

+ 12 - 0
ambari-server/src/main/resources/stacks/HDP/2.2/configuration/cluster-env.xml

@@ -90,4 +90,16 @@
     <description>Destination HDFS folder for the file.</description>
     <description>Destination HDFS folder for the file.</description>
   </property>
   </property>
 
 
+  <!-- MapReduce2 tarball -->
+  <property>
+    <name>mr_tar_source</name>
+    <value>/usr/hdp/current/hadoop-client/mr-{{ component_version }}.{{ hdp_stack_version }}.tar.gz</value>
+    <description>Source file path that uses dynamic variables and regex to copy the file to HDFS.</description>
+  </property>
+  <property>
+    <name>mr_tar_destination_folder</name>
+    <value>hdfs:///hdp/apps/{{ hdp_stack_version }}/mr/</value>
+    <description>Destination HDFS folder for the file.</description>
+  </property>
+
 </configuration>
 </configuration>

+ 2 - 0
ambari-server/src/main/resources/stacks/HDP/2.2/services/HDFS/configuration/hadoop-env.xml

@@ -135,6 +135,8 @@ export HADOOP_LIBEXEC_DIR={{hadoop_libexec_dir}}
 
 
 # Mostly required for hadoop 2.0
 # Mostly required for hadoop 2.0
 export JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:/usr/hdp/current/hadoop-client/lib/native/Linux-amd64-64
 export JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:/usr/hdp/current/hadoop-client/lib/native/Linux-amd64-64
+
+export HADOOP_OPTS="-Dhdp.version=$HDP_VERSION $HADOOP_OPTS"
     </value>
     </value>
   </property>
   </property>
   
   

+ 46 - 0
ambari-server/src/main/resources/stacks/HDP/2.2/services/YARN/configuration-mapred/mapred-env.xml

@@ -0,0 +1,46 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+
+<configuration>
+
+  
+  <!-- mapred-env.sh -->
+  <property>
+    <name>content</name>
+    <description>This is the jinja template for mapred-env.sh file</description>
+    <value>
+# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
+
+export HADOOP_JOB_HISTORYSERVER_HEAPSIZE={{jobhistory_heapsize}}
+
+export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA
+
+#export HADOOP_JOB_HISTORYSERVER_OPTS=
+#export HADOOP_MAPRED_LOG_DIR="" # Where log files are stored.  $HADOOP_MAPRED_HOME/logs by default.
+#export HADOOP_JHS_LOGGER=INFO,RFA # Hadoop JobSummary logger.
+#export HADOOP_MAPRED_PID_DIR= # The pid files are stored. /tmp by default.
+#export HADOOP_MAPRED_IDENT_STRING= #A string representing this instance of hadoop. $USER by default
+#export HADOOP_MAPRED_NICENESS= #The scheduling priority for daemons. Defaults to 0.
+export HADOOP_OPTS="-Dhdp.version=$HDP_VERSION $HADOOP_OPTS"
+    </value>
+  </property>
+</configuration>

+ 26 - 2
ambari-server/src/main/resources/stacks/HDP/2.2/services/YARN/configuration-mapred/mapred-site.xml

@@ -24,7 +24,7 @@
 
 
   <property>
   <property>
     <name>mapreduce.admin.user.env</name>
     <name>mapreduce.admin.user.env</name>
-    <value>LD_LIBRARY_PATH=/usr/lib/hadoop/lib/native:/usr/hdp/current/hadoop-client/lib/native/Linux-amd64-64</value>
+    <value>LD_LIBRARY_PATH=/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-amd64-64</value>
     <description>
     <description>
       Additional execution environment entries for map and reduce task processes.
       Additional execution environment entries for map and reduce task processes.
       This is not an additive property. You must preserve the original value if
       This is not an additive property. You must preserve the original value if
@@ -34,12 +34,36 @@
 
 
   <property>
   <property>
     <name>mapreduce.application.classpath</name>
     <name>mapreduce.application.classpath</name>
-    <value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*,/usr/hdp/current/hadoop-mapreduce-client/,/usr/hdp/current/hadoop-mapreduce-client/lib,/usr/hdp/current/hadoop-client/</value>
+    <value>$PWD/mr-framework/hadoop-2.6.0.${hdp.version}/share/hadoop/mapreduce/*:$PWD/mr-framework/hadoop-2.6.0.${hdp.version}/share/hadoop/mapreduce/lib/*:$PWD/mr-framework/hadoop-2.6.0.${hdp.version}/share/hadoop/common/*:$PWD/mr-framework/hadoop-2.6.0.${hdp.version}/share/hadoop/common/lib/*:$PWD/mr-framework/hadoop-2.6.0.${hdp.version}/share/hadoop/yarn/*:$PWD/mr-framework/hadoop-2.6.0.${hdp.version}/share/hadoop/yarn/lib/*:$PWD/mr-framework/hadoop-2.6.0.${hdp.version}/share/hadoop/hdfs/*:$PWD/mr-framework/hadoop-2.6.0.${hdp.version}/share/hadoop/hdfs/lib/*:/usr/hdp/${hdp.version}/hadoop/lib/hadoop-lzo-0.6.0.jar</value>
     <description>
     <description>
       CLASSPATH for MR applications. A comma-separated list of CLASSPATH
       CLASSPATH for MR applications. A comma-separated list of CLASSPATH
       entries.
       entries.
     </description>
     </description>
   </property>
   </property>
 
 
+  <property>
+    <name>mapreduce.application.framework.path</name>
+    <value>hdfs:///hdp/apps/${hdp.version}/mr/mr-2.6.0.${hdp.version}.tar.gz#mr-framework</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>yarn.app.mapreduce.am.admin-command-opts</name>
+    <value>-Xmx256m -Dhdp.version=${hdp.version}</value>
+    <description>
+      Java opts for the MR App Master processes.
+      The following symbol, if present, will be interpolated: @taskid@ is replaced
+      by current TaskID. Any other occurrences of '@' will go unchanged.
+      For example, to enable verbose gc logging to a file named for the taskid in
+      /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
+      -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
+
+      Usage of -Djava.library.path can cause programs to no longer function if
+      hadoop native libraries are used. These values should instead be set as part
+      of LD_LIBRARY_PATH in the map / reduce JVM env using the mapreduce.map.env and
+      mapreduce.reduce.env config settings.
+    </description>
+  </property>
+
 
 
 </configuration>
 </configuration>

+ 21 - 0
ambari-server/src/main/resources/stacks/HDP/2.2/services/stack_advisor.py

@@ -23,6 +23,7 @@ class HDP22StackAdvisor(HDP21StackAdvisor):
     parentRecommendConfDict = super(HDP22StackAdvisor, self).getServiceConfigurationRecommenderDict()
     parentRecommendConfDict = super(HDP22StackAdvisor, self).getServiceConfigurationRecommenderDict()
     childRecommendConfDict = {
     childRecommendConfDict = {
       "HDFS": self.recommendHDFSConfigurations,
       "HDFS": self.recommendHDFSConfigurations,
+      "MAPREDUCE2": self.recommendMapReduce2Configurations,
       "TEZ": self.recommendTezConfigurations
       "TEZ": self.recommendTezConfigurations
     }
     }
     parentRecommendConfDict.update(childRecommendConfDict)
     parentRecommendConfDict.update(childRecommendConfDict)
@@ -46,6 +47,7 @@ class HDP22StackAdvisor(HDP21StackAdvisor):
     parentValidators = super(HDP22StackAdvisor, self).getServiceConfigurationValidators()
     parentValidators = super(HDP22StackAdvisor, self).getServiceConfigurationValidators()
     childValidators = {
     childValidators = {
       "HDFS": ["hdfs-site", self.validateHDFSConfigurations],
       "HDFS": ["hdfs-site", self.validateHDFSConfigurations],
+      "MAPREDUCE2": ["mapred-site", self.validateMapReduce2Configurations],
       "TEZ": ["tez-site", self.validateTezConfigurations]
       "TEZ": ["tez-site", self.validateTezConfigurations]
     }
     }
     parentValidators.update(childValidators)
     parentValidators.update(childValidators)
@@ -58,6 +60,25 @@ class HDP22StackAdvisor(HDP21StackAdvisor):
                         {"config-name": 'tez.runtime.unordered.output.buffer.size-mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'tez.runtime.unordered.output.buffer.size-mb')},]
                         {"config-name": 'tez.runtime.unordered.output.buffer.size-mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'tez.runtime.unordered.output.buffer.size-mb')},]
     return self.toConfigurationValidationProblems(validationItems, "tez-site")
     return self.toConfigurationValidationProblems(validationItems, "tez-site")
 
 
+  def recommendMapReduce2Configurations(self, configurations, clusterData):
+    putMapredProperty = self.putProperty(configurations, "mapred-site")
+    putMapredProperty('yarn.app.mapreduce.am.resource.mb', int(clusterData['amMemory']))
+    putMapredProperty('yarn.app.mapreduce.am.command-opts', "-Xmx" + str(int(round(0.8 * clusterData['amMemory']))) + "m" + " -Dhdp.version=${hdp.version}")
+    putMapredProperty('mapreduce.map.memory.mb', clusterData['mapMemory'])
+    putMapredProperty('mapreduce.reduce.memory.mb', int(clusterData['reduceMemory']))
+    putMapredProperty('mapreduce.map.java.opts', "-Xmx" + str(int(round(0.8 * clusterData['mapMemory']))) + "m")
+    putMapredProperty('mapreduce.reduce.java.opts', "-Xmx" + str(int(round(0.8 * clusterData['reduceMemory']))) + "m")
+    putMapredProperty('mapreduce.task.io.sort.mb', min(int(round(0.4 * clusterData['mapMemory'])), 1024))
+
+  def validateMapReduce2Configurations(self, properties, recommendedDefaults, configurations):
+    validationItems = [ {"config-name": 'mapreduce.map.java.opts', "item": self.validateXmxValue(properties, recommendedDefaults, 'mapreduce.map.java.opts')},
+                        {"config-name": 'mapreduce.reduce.java.opts', "item": self.validateXmxValue(properties, recommendedDefaults, 'mapreduce.reduce.java.opts')},
+                        {"config-name": 'mapreduce.task.io.sort.mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'mapreduce.task.io.sort.mb')},
+                        {"config-name": 'mapreduce.map.memory.mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'mapreduce.map.memory.mb')},
+                        {"config-name": 'mapreduce.reduce.memory.mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'mapreduce.reduce.memory.mb')},
+                        {"config-name": 'yarn.app.mapreduce.am.resource.mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'yarn.app.mapreduce.am.resource.mb')}]
+    return self.toConfigurationValidationProblems(validationItems, "mapred-site")
+
   def validateHDFSConfigurations(self, properties, recommendedDefaults, configurations):
   def validateHDFSConfigurations(self, properties, recommendedDefaults, configurations):
     # We can not access property hadoop.security.authentication from the
     # We can not access property hadoop.security.authentication from the
     # other config (core-site). That's why we are using another heuristics here
     # other config (core-site). That's why we are using another heuristics here

+ 26 - 0
ambari-web/app/data/HDP2/site_properties.js

@@ -2056,6 +2056,32 @@ module.exports =
       "serviceName": "MISC",
       "serviceName": "MISC",
       "filename": "cluster-env.xml"
       "filename": "cluster-env.xml"
     },
     },
+    {
+      "id": "puppet var",
+      "name": "mr_tar_source",
+      "displayName": "MapReduce2 tarball source",
+      "description": "Source file path that uses dynamic variables and regex to copy the file to HDFS.",
+      "defaultValue": '',
+      "isRequired": true,
+      "isOverridable": false,
+      "isVisible": false,
+      "isEditable": false,
+      "serviceName": "MISC",
+      "filename": "cluster-env.xml"
+    },
+    {
+      "id": "puppet var",
+      "name": "mr_tar_destination_folder",
+      "displayName": "MapReduce2 tarball destination folder",
+      "description": "Destination HDFS folder for the file.",
+      "defaultValue": '',
+      "isRequired": true,
+      "isOverridable": false,
+      "isVisible": false,
+      "isEditable": false,
+      "serviceName": "MISC",
+      "filename": "cluster-env.xml"
+    },
     {
     {
       "id": "puppet var",
       "id": "puppet var",
       "name": "tez_tar_source",
       "name": "tez_tar_source",