|
@@ -0,0 +1,573 @@
|
|
|
+<?xml version="1.0"?>
|
|
|
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
|
|
+
|
|
|
+<!--
|
|
|
+ Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
+ contributor license agreements. See the NOTICE file distributed with
|
|
|
+ this work for additional information regarding copyright ownership.
|
|
|
+ The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
+ (the "License"); you may not use this file except in compliance with
|
|
|
+ the License. You may obtain a copy of the License at
|
|
|
+
|
|
|
+ http://www.apache.org/licenses/LICENSE-2.0
|
|
|
+
|
|
|
+ Unless required by applicable law or agreed to in writing, software
|
|
|
+ distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
+ See the License for the specific language governing permissions and
|
|
|
+ limitations under the License.
|
|
|
+-->
|
|
|
+
|
|
|
+<!-- Put site-specific property overrides in this file. -->
|
|
|
+
|
|
|
+<configuration xmlns:xi="http://www.w3.org/2001/XInclude">
|
|
|
+
|
|
|
+<!-- i/o properties -->
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>io.sort.mb</name>
|
|
|
+ <value>200</value>
|
|
|
+ <description>
|
|
|
+ The total amount of Map-side buffer memory to use while sorting files
|
|
|
+ </description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>io.sort.record.percent</name>
|
|
|
+ <value>.2</value>
|
|
|
+ <description>No description</description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>io.sort.spill.percent</name>
|
|
|
+ <value>0.9</value>
|
|
|
+ <description>Percentage of sort buffer used for record collection</description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>io.sort.factor</name>
|
|
|
+ <value>100</value>
|
|
|
+ <description>No description</description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+<!-- map/reduce properties -->
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.tasktracker.tasks.sleeptime-before-sigkill</name>
|
|
|
+ <value>250</value>
|
|
|
+ <description>Normally, this is the amount of time before killing
|
|
|
+ processes, and the recommended-default is 5.000 seconds - a value of
|
|
|
+ 5000 here. In this case, we are using it solely to blast tasks before
|
|
|
+ killing them, and killing them very quickly (1/4 second) to guarantee
|
|
|
+ that we do not leave VMs around for later jobs.
|
|
|
+ </description>
|
|
|
+</property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.job.tracker.handler.count</name>
|
|
|
+ <value>50</value>
|
|
|
+ <description>
|
|
|
+ The number of server threads for the JobTracker. This should be roughly
|
|
|
+ 4% of the number of tasktracker nodes.
|
|
|
+ </description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.system.dir</name>
|
|
|
+ <value>/mapred/system</value>
|
|
|
+ <description>Path on the HDFS where where the MapReduce framework stores system files</description>
|
|
|
+ <final>true</final>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.job.tracker</name>
|
|
|
+ <!-- cluster variant -->
|
|
|
+ <value>localhost:50300</value>
|
|
|
+ <description>JobTracker address</description>
|
|
|
+ <final>true</final>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.job.tracker.http.address</name>
|
|
|
+ <!-- cluster variant -->
|
|
|
+ <value>localhost:50030</value>
|
|
|
+ <description>JobTracker host and http port address</description>
|
|
|
+ <final>true</final>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <!-- cluster specific -->
|
|
|
+ <name>mapred.local.dir</name>
|
|
|
+ <value>/hadoop/mapred</value>
|
|
|
+ <description>No description</description>
|
|
|
+ <final>true</final>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapreduce.cluster.administrators</name>
|
|
|
+ <value> hadoop</value>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.reduce.parallel.copies</name>
|
|
|
+ <value>30</value>
|
|
|
+ <description>No description</description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.tasktracker.map.tasks.maximum</name>
|
|
|
+ <value>4</value>
|
|
|
+ <description>No description</description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.tasktracker.reduce.tasks.maximum</name>
|
|
|
+ <value>2</value>
|
|
|
+ <description>No description</description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>tasktracker.http.threads</name>
|
|
|
+ <value>50</value>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.map.tasks.speculative.execution</name>
|
|
|
+ <value>false</value>
|
|
|
+ <description>If true, then multiple instances of some map tasks
|
|
|
+ may be executed in parallel.</description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.reduce.tasks.speculative.execution</name>
|
|
|
+ <value>false</value>
|
|
|
+ <description>If true, then multiple instances of some reduce tasks
|
|
|
+ may be executed in parallel.</description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.reduce.slowstart.completed.maps</name>
|
|
|
+ <value>0.05</value>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.inmem.merge.threshold</name>
|
|
|
+ <value>1000</value>
|
|
|
+ <description>The threshold, in terms of the number of files
|
|
|
+ for the in-memory merge process. When we accumulate threshold number of files
|
|
|
+ we initiate the in-memory merge and spill to disk. A value of 0 or less than
|
|
|
+ 0 indicates we want to DON'T have any threshold and instead depend only on
|
|
|
+ the ramfs's memory consumption to trigger the merge.
|
|
|
+ </description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.job.shuffle.merge.percent</name>
|
|
|
+ <value>0.66</value>
|
|
|
+ <description>The usage threshold at which an in-memory merge will be
|
|
|
+ initiated, expressed as a percentage of the total memory allocated to
|
|
|
+ storing in-memory map outputs, as defined by
|
|
|
+ mapred.job.shuffle.input.buffer.percent.
|
|
|
+ </description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.job.shuffle.input.buffer.percent</name>
|
|
|
+ <value>0.7</value>
|
|
|
+ <description>The percentage of memory to be allocated from the maximum heap
|
|
|
+ size to storing map outputs during the shuffle.
|
|
|
+ </description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.map.output.compression.codec</name>
|
|
|
+ <value>org.apache.hadoop.io.compress.SnappyCodec</value>
|
|
|
+ <description>If the map outputs are compressed, how should they be
|
|
|
+ compressed
|
|
|
+ </description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.output.compression.type</name>
|
|
|
+ <value>BLOCK</value>
|
|
|
+ <description>If the job outputs are to compressed as SequenceFiles, how should
|
|
|
+ they be compressed? Should be one of NONE, RECORD or BLOCK.
|
|
|
+ </description>
|
|
|
+</property>
|
|
|
+
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.jobtracker.completeuserjobs.maximum</name>
|
|
|
+ <value>5</value>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.jobtracker.taskScheduler</name>
|
|
|
+ <value>org.apache.hadoop.mapred.CapacityTaskScheduler</value>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.jobtracker.restart.recover</name>
|
|
|
+ <value>false</value>
|
|
|
+ <description>"true" to enable (job) recovery upon restart,
|
|
|
+ "false" to start afresh
|
|
|
+ </description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.job.reduce.input.buffer.percent</name>
|
|
|
+ <value>0.0</value>
|
|
|
+ <description>The percentage of memory- relative to the maximum heap size- to
|
|
|
+ retain map outputs during the reduce. When the shuffle is concluded, any
|
|
|
+ remaining map outputs in memory must consume less than this threshold before
|
|
|
+ the reduce can begin.
|
|
|
+ </description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapreduce.reduce.input.limit</name>
|
|
|
+ <value>10737418240</value>
|
|
|
+ <description>The limit on the input size of the reduce. (This value
|
|
|
+ is 10 Gb.) If the estimated input size of the reduce is greater than
|
|
|
+ this value, job is failed. A value of -1 means that there is no limit
|
|
|
+ set. </description>
|
|
|
+</property>
|
|
|
+
|
|
|
+
|
|
|
+ <!-- copied from kryptonite configuration -->
|
|
|
+ <property>
|
|
|
+ <name>mapred.compress.map.output</name>
|
|
|
+ <value></value>
|
|
|
+ </property>
|
|
|
+
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.task.timeout</name>
|
|
|
+ <value>600000</value>
|
|
|
+ <description>The number of milliseconds before a task will be
|
|
|
+ terminated if it neither reads an input, writes an output, nor
|
|
|
+ updates its status string.
|
|
|
+ </description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>jetty.connector</name>
|
|
|
+ <value>org.mortbay.jetty.nio.SelectChannelConnector</value>
|
|
|
+ <description>No description</description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.task.tracker.task-controller</name>
|
|
|
+ <value>org.apache.hadoop.mapred.DefaultTaskController</value>
|
|
|
+ <description>
|
|
|
+ TaskController which is used to launch and manage task execution.
|
|
|
+ </description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.child.root.logger</name>
|
|
|
+ <value>INFO,TLA</value>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.child.java.opts</name>
|
|
|
+ <value></value>
|
|
|
+
|
|
|
+ <description>No description</description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.cluster.map.memory.mb</name>
|
|
|
+ <value>1536</value>
|
|
|
+ <description>
|
|
|
+ The virtual memory size of a single Map slot in the MapReduce framework
|
|
|
+ </description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.cluster.reduce.memory.mb</name>
|
|
|
+ <value>2048</value>
|
|
|
+ <description>
|
|
|
+ The virtual memory size of a single Reduce slot in the MapReduce framework
|
|
|
+ </description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.job.map.memory.mb</name>
|
|
|
+ <value>1536</value>
|
|
|
+ <description>
|
|
|
+ Virtual memory for single Map task
|
|
|
+ </description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.job.reduce.memory.mb</name>
|
|
|
+ <value>2048</value>
|
|
|
+ <description>
|
|
|
+ Virtual memory for single Reduce task
|
|
|
+ </description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.cluster.max.map.memory.mb</name>
|
|
|
+ <value>6144</value>
|
|
|
+ <description>
|
|
|
+ Upper limit on virtual memory size for a single Map task of any MapReduce job
|
|
|
+ </description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.cluster.max.reduce.memory.mb</name>
|
|
|
+ <value>4096</value>
|
|
|
+ <description>
|
|
|
+ Upper limit on virtual memory size for a single Reduce task of any MapReduce job
|
|
|
+ </description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.hosts</name>
|
|
|
+ <value>/etc/hadoop/conf/mapred.include</value>
|
|
|
+ <description>
|
|
|
+ Names a file that contains the list of nodes that may
|
|
|
+ connect to the jobtracker. If the value is empty, all hosts are
|
|
|
+ permitted.
|
|
|
+ </description>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.hosts.exclude</name>
|
|
|
+ <value>/etc/hadoop/conf/mapred.exclude</value>
|
|
|
+ <description>
|
|
|
+ Names a file that contains the list of hosts that
|
|
|
+ should be excluded by the jobtracker. If the value is empty, no
|
|
|
+ hosts are excluded.
|
|
|
+ </description>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.max.tracker.blacklists</name>
|
|
|
+ <value>16</value>
|
|
|
+ <description>
|
|
|
+ if node is reported blacklisted by 16 successful jobs within timeout-window, it will be graylisted
|
|
|
+ </description>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.healthChecker.script.path</name>
|
|
|
+ <value>file:////mapred/jobstatus</value>
|
|
|
+ <description>
|
|
|
+ Directory path to view job status
|
|
|
+ </description>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.healthChecker.interval</name>
|
|
|
+ <value>135000</value>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.healthChecker.script.timeout</name>
|
|
|
+ <value>60000</value>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.job.tracker.persist.jobstatus.active</name>
|
|
|
+ <value>false</value>
|
|
|
+ <description>Indicates if persistency of job status information is
|
|
|
+ active or not.
|
|
|
+ </description>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.job.tracker.persist.jobstatus.hours</name>
|
|
|
+ <value>1</value>
|
|
|
+ <description>The number of hours job status information is persisted in DFS.
|
|
|
+ The job status information will be available after it drops of the memory
|
|
|
+ queue and between jobtracker restarts. With a zero value the job status
|
|
|
+ information is not persisted at all in DFS.
|
|
|
+ </description>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.job.tracker.persist.jobstatus.dir</name>
|
|
|
+ <value>/etc/hadoop/conf/health_check</value>
|
|
|
+ <description>The directory where the job status information is persisted
|
|
|
+ in a file system to be available after it drops of the memory queue and
|
|
|
+ between jobtracker restarts.
|
|
|
+ </description>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.jobtracker.retirejob.check</name>
|
|
|
+ <value>10000</value>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.jobtracker.retirejob.interval</name>
|
|
|
+ <value>21600000</value>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.job.tracker.history.completed.location</name>
|
|
|
+ <value>/mapred/history/done</value>
|
|
|
+ <description>No description</description>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.task.maxvmem</name>
|
|
|
+ <value></value>
|
|
|
+ <final>true</final>
|
|
|
+ <description>No description</description>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.jobtracker.maxtasks.per.job</name>
|
|
|
+ <value>-1</value>
|
|
|
+ <final>true</final>
|
|
|
+ <description>The maximum number of tasks for a single job.
|
|
|
+ A value of -1 indicates that there is no maximum. </description>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapreduce.fileoutputcommitter.marksuccessfuljobs</name>
|
|
|
+ <value>false</value>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.userlog.retain.hours</name>
|
|
|
+ <value>24</value>
|
|
|
+ <description>
|
|
|
+ The maximum time, in hours, for which the user-logs are to be retained after the job completion.
|
|
|
+ </description>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.job.reuse.jvm.num.tasks</name>
|
|
|
+ <value>1</value>
|
|
|
+ <description>
|
|
|
+ How many tasks to run per jvm. If set to -1, there is no limit
|
|
|
+ </description>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapreduce.jobtracker.kerberos.principal</name>
|
|
|
+ <value></value>
|
|
|
+ <description>
|
|
|
+ JT user name key.
|
|
|
+ </description>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapreduce.tasktracker.kerberos.principal</name>
|
|
|
+ <value></value>
|
|
|
+ <description>
|
|
|
+ tt user name key. "_HOST" is replaced by the host name of the task tracker.
|
|
|
+ </description>
|
|
|
+</property>
|
|
|
+
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>hadoop.job.history.user.location</name>
|
|
|
+ <value>none</value>
|
|
|
+ <final>true</final>
|
|
|
+ </property>
|
|
|
+
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapreduce.jobtracker.keytab.file</name>
|
|
|
+ <value></value>
|
|
|
+ <description>
|
|
|
+ The keytab for the jobtracker principal.
|
|
|
+ </description>
|
|
|
+
|
|
|
+</property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapreduce.tasktracker.keytab.file</name>
|
|
|
+ <value></value>
|
|
|
+ <description>The filename of the keytab for the task tracker</description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapred.task.tracker.http.address</name>
|
|
|
+ <value></value>
|
|
|
+ <description>Http address for task tracker.</description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapreduce.jobtracker.staging.root.dir</name>
|
|
|
+ <value>/user</value>
|
|
|
+ <description>The Path prefix for where the staging directories should be placed. The next level is always the user's
|
|
|
+ name. It is a path in the default file system.</description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapreduce.tasktracker.group</name>
|
|
|
+ <value>hadoop</value>
|
|
|
+ <description>The group that the task controller uses for accessing the task controller. The mapred user must be a member and users should *not* be members.</description>
|
|
|
+
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapreduce.jobtracker.split.metainfo.maxsize</name>
|
|
|
+ <value>50000000</value>
|
|
|
+ <final>true</final>
|
|
|
+ <description>If the size of the split metainfo file is larger than this, the JobTracker will fail the job during
|
|
|
+ initialize.
|
|
|
+ </description>
|
|
|
+ </property>
|
|
|
+ <property>
|
|
|
+ <name>mapreduce.history.server.embedded</name>
|
|
|
+ <value>false</value>
|
|
|
+ <description>Should job history server be embedded within Job tracker
|
|
|
+process</description>
|
|
|
+ <final>true</final>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapreduce.history.server.http.address</name>
|
|
|
+ <!-- cluster variant -->
|
|
|
+ <value>localhost:51111</value>
|
|
|
+ <description>Http address of the history server</description>
|
|
|
+ <final>true</final>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapreduce.jobhistory.kerberos.principal</name>
|
|
|
+ <!-- cluster variant -->
|
|
|
+ <value></value>
|
|
|
+ <description>Job history user name key. (must map to same user as JT
|
|
|
+user)</description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+ <property>
|
|
|
+ <name>mapreduce.jobhistory.keytab.file</name>
|
|
|
+ <!-- cluster variant -->
|
|
|
+ <value></value>
|
|
|
+ <description>The keytab for the job history server principal.</description>
|
|
|
+ </property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.jobtracker.blacklist.fault-timeout-window</name>
|
|
|
+ <value>180</value>
|
|
|
+ <description>
|
|
|
+ 3-hour sliding window (value is in minutes)
|
|
|
+ </description>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.jobtracker.blacklist.fault-bucket-width</name>
|
|
|
+ <value>15</value>
|
|
|
+ <description>
|
|
|
+ 15-minute bucket size (value is in minutes)
|
|
|
+ </description>
|
|
|
+</property>
|
|
|
+
|
|
|
+<property>
|
|
|
+ <name>mapred.queue.names</name>
|
|
|
+ <value>default</value>
|
|
|
+ <description> Comma separated list of queues configured for this jobtracker.</description>
|
|
|
+</property>
|
|
|
+
|
|
|
+</configuration>
|