Pārlūkot izejas kodu

AMBARI-7981 Use static mapred-site.xml file for MR by using ${hdp.version} variable (dsen)

Dmytro Sen 10 gadi atpakaļ
vecāks
revīzija
af0253506f

+ 1 - 1
ambari-server/src/main/python/UpgradeHelper_HDP2.py

@@ -455,7 +455,7 @@ MAPRED_SITE = {
   "mapreduce.tasktracker.taskmemorymanager.monitoringinterval": "DELETE_OLD",
   "mapreduce.tasktracker.tasks.sleeptimebeforesigkill": "DELETE_OLD",
   "yarn.app.mapreduce.am.admin-command-opts": "-Djava.net.preferIPv4Stack=true -Dhadoop.metrics.log.level=WARN",
-  "yarn.app.mapreduce.am.command-opts": "-Xmx312m",
+  "yarn.app.mapreduce.am.command-opts": "-Xmx312m -Dhdp.version=${hdp.version}",
   "yarn.app.mapreduce.am.log.level": "INFO",
   "yarn.app.mapreduce.am.resource.mb": "512",
   "yarn.app.mapreduce.am.staging-dir": "/user"

+ 2 - 0
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/package/scripts/historyserver.py

@@ -20,6 +20,7 @@ Ambari Agent
 """
 import sys
 from resource_management import *
+from resource_management.libraries.functions.dynamic_variable_interpretation import copy_tarballs_to_hdfs
 
 from yarn import yarn
 from service import service
@@ -37,6 +38,7 @@ class HistoryServer(Script):
     import params
     env.set_params(params)
     self.configure(env) # FOR SECURITY
+    copy_tarballs_to_hdfs('mr', params.mapred_user, params.hdfs_user)
     service('historyserver', action='start', serviceName='mapreduce')
 
   def stop(self, env):

+ 3 - 4
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/package/scripts/params.py

@@ -21,6 +21,7 @@ Ambari Agent
 import os
 
 from resource_management import *
+from resource_management.libraries.functions.version import compare_versions, format_hdp_stack_version
 import status_params
 
 # server configurations
@@ -28,10 +29,10 @@ config = Script.get_config()
 tmp_dir = Script.get_tmp_dir()
 
 hdp_stack_version = str(config['hostLevelParams']['stack_version'])
-stack_is_hdp22_or_further = not (hdp_stack_version.startswith('2.0') or hdp_stack_version.startswith('2.1'))
+hdp_stack_version = format_hdp_stack_version(hdp_stack_version)
 
 #hadoop params
-if stack_is_hdp22_or_further:
+if compare_versions(hdp_stack_version, "2.2.0.0") >= 0:
   hadoop_libexec_dir = "/usr/hdp/current/hadoop-client/libexec"
   hadoop_bin = "/usr/hdp/current/hadoop-client/sbin"
   hadoop_bin_dir = "/usr/hdp/current/hadoop-client/bin"
@@ -149,9 +150,7 @@ jobhistory_heapsize = default("/configurations/mapred-env/jobhistory_heapsize",
 #for create_hdfs_directory
 hostname = config["hostname"]
 hdfs_user_keytab = config['configurations']['hadoop-env']['hdfs_user_keytab']
-hdfs_user = config['configurations']['hadoop-env']['hdfs_user']
 hdfs_principal_name = config['configurations']['hadoop-env']['hdfs_principal_name']
-kinit_path_local = functions.get_kinit_path(["/usr/bin", "/usr/kerberos/bin", "/usr/sbin"])
 import functools
 #create partial functions with common arguments for every HdfsDirectory call
 #to create hdfs directory we need to call params.HdfsDirectory in code

+ 12 - 0
ambari-server/src/main/resources/stacks/HDP/2.2/configuration/cluster-env.xml

@@ -90,4 +90,16 @@
     <description>Destination HDFS folder for the file.</description>
   </property>
 
+  <!-- MapReduce2 tarball -->
+  <property>
+    <name>mr_tar_source</name>
+    <value>/usr/hdp/current/hadoop-client/mr-{{ component_version }}.{{ hdp_stack_version }}.tar.gz</value>
+    <description>Source file path that uses dynamic variables and regex to copy the file to HDFS.</description>
+  </property>
+  <property>
+    <name>mr_tar_destination_folder</name>
+    <value>hdfs:///hdp/apps/{{ hdp_stack_version }}/mr/</value>
+    <description>Destination HDFS folder for the file.</description>
+  </property>
+
 </configuration>

+ 2 - 0
ambari-server/src/main/resources/stacks/HDP/2.2/services/HDFS/configuration/hadoop-env.xml

@@ -135,6 +135,8 @@ export HADOOP_LIBEXEC_DIR={{hadoop_libexec_dir}}
 
 # Mostly required for hadoop 2.0
 export JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:/usr/hdp/current/hadoop-client/lib/native/Linux-amd64-64
+
+export HADOOP_OPTS="-Dhdp.version=$HDP_VERSION $HADOOP_OPTS"
     </value>
   </property>
   

+ 46 - 0
ambari-server/src/main/resources/stacks/HDP/2.2/services/YARN/configuration-mapred/mapred-env.xml

@@ -0,0 +1,46 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+
+<configuration>
+
+  
+  <!-- mapred-env.sh -->
+  <property>
+    <name>content</name>
+    <description>This is the jinja template for mapred-env.sh file</description>
+    <value>
+# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
+
+export HADOOP_JOB_HISTORYSERVER_HEAPSIZE={{jobhistory_heapsize}}
+
+export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA
+
+#export HADOOP_JOB_HISTORYSERVER_OPTS=
+#export HADOOP_MAPRED_LOG_DIR="" # Where log files are stored.  $HADOOP_MAPRED_HOME/logs by default.
+#export HADOOP_JHS_LOGGER=INFO,RFA # Hadoop JobSummary logger.
+#export HADOOP_MAPRED_PID_DIR= # The pid files are stored. /tmp by default.
+#export HADOOP_MAPRED_IDENT_STRING= #A string representing this instance of hadoop. $USER by default
+#export HADOOP_MAPRED_NICENESS= #The scheduling priority for daemons. Defaults to 0.
+export HADOOP_OPTS="-Dhdp.version=$HDP_VERSION $HADOOP_OPTS"
+    </value>
+  </property>
+</configuration>

+ 26 - 2
ambari-server/src/main/resources/stacks/HDP/2.2/services/YARN/configuration-mapred/mapred-site.xml

@@ -24,7 +24,7 @@
 
   <property>
     <name>mapreduce.admin.user.env</name>
-    <value>LD_LIBRARY_PATH=/usr/lib/hadoop/lib/native:/usr/hdp/current/hadoop-client/lib/native/Linux-amd64-64</value>
+    <value>LD_LIBRARY_PATH=/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-amd64-64</value>
     <description>
       Additional execution environment entries for map and reduce task processes.
       This is not an additive property. You must preserve the original value if
@@ -34,12 +34,36 @@
 
   <property>
     <name>mapreduce.application.classpath</name>
-    <value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*,/usr/hdp/current/hadoop-mapreduce-client/,/usr/hdp/current/hadoop-mapreduce-client/lib,/usr/hdp/current/hadoop-client/</value>
+    <value>$PWD/mr-framework/hadoop-2.6.0.${hdp.version}/share/hadoop/mapreduce/*:$PWD/mr-framework/hadoop-2.6.0.${hdp.version}/share/hadoop/mapreduce/lib/*:$PWD/mr-framework/hadoop-2.6.0.${hdp.version}/share/hadoop/common/*:$PWD/mr-framework/hadoop-2.6.0.${hdp.version}/share/hadoop/common/lib/*:$PWD/mr-framework/hadoop-2.6.0.${hdp.version}/share/hadoop/yarn/*:$PWD/mr-framework/hadoop-2.6.0.${hdp.version}/share/hadoop/yarn/lib/*:$PWD/mr-framework/hadoop-2.6.0.${hdp.version}/share/hadoop/hdfs/*:$PWD/mr-framework/hadoop-2.6.0.${hdp.version}/share/hadoop/hdfs/lib/*:/usr/hdp/${hdp.version}/hadoop/lib/hadoop-lzo-0.6.0.jar</value>
     <description>
       CLASSPATH for MR applications. A comma-separated list of CLASSPATH
       entries.
     </description>
   </property>
 
+  <property>
+    <name>mapreduce.application.framework.path</name>
+    <value>hdfs:///hdp/apps/${hdp.version}/mr/mr-2.6.0.${hdp.version}.tar.gz#mr-framework</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>yarn.app.mapreduce.am.admin-command-opts</name>
+    <value>-Xmx256m -Dhdp.version=${hdp.version}</value>
+    <description>
+      Java opts for the MR App Master processes.
+      The following symbol, if present, will be interpolated: @taskid@ is replaced
+      by current TaskID. Any other occurrences of '@' will go unchanged.
+      For example, to enable verbose gc logging to a file named for the taskid in
+      /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
+      -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
+
+      Usage of -Djava.library.path can cause programs to no longer function if
+      hadoop native libraries are used. These values should instead be set as part
+      of LD_LIBRARY_PATH in the map / reduce JVM env using the mapreduce.map.env and
+      mapreduce.reduce.env config settings.
+    </description>
+  </property>
+
 
 </configuration>

+ 21 - 0
ambari-server/src/main/resources/stacks/HDP/2.2/services/stack_advisor.py

@@ -23,6 +23,7 @@ class HDP22StackAdvisor(HDP21StackAdvisor):
     parentRecommendConfDict = super(HDP22StackAdvisor, self).getServiceConfigurationRecommenderDict()
     childRecommendConfDict = {
       "HDFS": self.recommendHDFSConfigurations,
+      "MAPREDUCE2": self.recommendMapReduce2Configurations,
       "TEZ": self.recommendTezConfigurations
     }
     parentRecommendConfDict.update(childRecommendConfDict)
@@ -46,6 +47,7 @@ class HDP22StackAdvisor(HDP21StackAdvisor):
     parentValidators = super(HDP22StackAdvisor, self).getServiceConfigurationValidators()
     childValidators = {
       "HDFS": ["hdfs-site", self.validateHDFSConfigurations],
+      "MAPREDUCE2": ["mapred-site", self.validateMapReduce2Configurations],
       "TEZ": ["tez-site", self.validateTezConfigurations]
     }
     parentValidators.update(childValidators)
@@ -58,6 +60,25 @@ class HDP22StackAdvisor(HDP21StackAdvisor):
                         {"config-name": 'tez.runtime.unordered.output.buffer.size-mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'tez.runtime.unordered.output.buffer.size-mb')},]
     return self.toConfigurationValidationProblems(validationItems, "tez-site")
 
+  def recommendMapReduce2Configurations(self, configurations, clusterData):
+    putMapredProperty = self.putProperty(configurations, "mapred-site")
+    putMapredProperty('yarn.app.mapreduce.am.resource.mb', int(clusterData['amMemory']))
+    putMapredProperty('yarn.app.mapreduce.am.command-opts', "-Xmx" + str(int(round(0.8 * clusterData['amMemory']))) + "m" + " -Dhdp.version=${hdp.version}")
+    putMapredProperty('mapreduce.map.memory.mb', clusterData['mapMemory'])
+    putMapredProperty('mapreduce.reduce.memory.mb', int(clusterData['reduceMemory']))
+    putMapredProperty('mapreduce.map.java.opts', "-Xmx" + str(int(round(0.8 * clusterData['mapMemory']))) + "m")
+    putMapredProperty('mapreduce.reduce.java.opts', "-Xmx" + str(int(round(0.8 * clusterData['reduceMemory']))) + "m")
+    putMapredProperty('mapreduce.task.io.sort.mb', min(int(round(0.4 * clusterData['mapMemory'])), 1024))
+
+  def validateMapReduce2Configurations(self, properties, recommendedDefaults, configurations):
+    validationItems = [ {"config-name": 'mapreduce.map.java.opts', "item": self.validateXmxValue(properties, recommendedDefaults, 'mapreduce.map.java.opts')},
+                        {"config-name": 'mapreduce.reduce.java.opts', "item": self.validateXmxValue(properties, recommendedDefaults, 'mapreduce.reduce.java.opts')},
+                        {"config-name": 'mapreduce.task.io.sort.mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'mapreduce.task.io.sort.mb')},
+                        {"config-name": 'mapreduce.map.memory.mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'mapreduce.map.memory.mb')},
+                        {"config-name": 'mapreduce.reduce.memory.mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'mapreduce.reduce.memory.mb')},
+                        {"config-name": 'yarn.app.mapreduce.am.resource.mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'yarn.app.mapreduce.am.resource.mb')}]
+    return self.toConfigurationValidationProblems(validationItems, "mapred-site")
+
   def validateHDFSConfigurations(self, properties, recommendedDefaults, configurations):
     # We can not access property hadoop.security.authentication from the
     # other config (core-site). That's why we are using another heuristics here

+ 26 - 0
ambari-web/app/data/HDP2/site_properties.js

@@ -2056,6 +2056,32 @@ module.exports =
       "serviceName": "MISC",
       "filename": "cluster-env.xml"
     },
+    {
+      "id": "puppet var",
+      "name": "mr_tar_source",
+      "displayName": "MapReduce2 tarball source",
+      "description": "Source file path that uses dynamic variables and regex to copy the file to HDFS.",
+      "defaultValue": '',
+      "isRequired": true,
+      "isOverridable": false,
+      "isVisible": false,
+      "isEditable": false,
+      "serviceName": "MISC",
+      "filename": "cluster-env.xml"
+    },
+    {
+      "id": "puppet var",
+      "name": "mr_tar_destination_folder",
+      "displayName": "MapReduce2 tarball destination folder",
+      "description": "Destination HDFS folder for the file.",
+      "defaultValue": '',
+      "isRequired": true,
+      "isOverridable": false,
+      "isVisible": false,
+      "isEditable": false,
+      "serviceName": "MISC",
+      "filename": "cluster-env.xml"
+    },
     {
       "id": "puppet var",
       "name": "tez_tar_source",