|
@@ -1,537 +0,0 @@
|
|
|
-<?xml version="1.0"?>
|
|
|
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
|
|
-
|
|
|
-<!--
|
|
|
- Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
- contributor license agreements. See the NOTICE file distributed with
|
|
|
- this work for additional information regarding copyright ownership.
|
|
|
- The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
- (the "License"); you may not use this file except in compliance with
|
|
|
- the License. You may obtain a copy of the License at
|
|
|
-
|
|
|
- http://www.apache.org/licenses/LICENSE-2.0
|
|
|
-
|
|
|
- Unless required by applicable law or agreed to in writing, software
|
|
|
- distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
- See the License for the specific language governing permissions and
|
|
|
- limitations under the License.
|
|
|
--->
|
|
|
-
|
|
|
-<!-- Put site-specific property overrides in this file. -->
|
|
|
-
|
|
|
-<configuration xmlns:xi="http://www.w3.org/2001/XInclude">
|
|
|
-
|
|
|
-<!-- i/o properties -->
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>io.sort.mb</name>
|
|
|
- <value></value>
|
|
|
- <description>No description</description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>io.sort.record.percent</name>
|
|
|
- <value>.2</value>
|
|
|
- <description>No description</description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>io.sort.spill.percent</name>
|
|
|
- <value></value>
|
|
|
- <description>No description</description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>io.sort.factor</name>
|
|
|
- <value>100</value>
|
|
|
- <description>No description</description>
|
|
|
- </property>
|
|
|
-
|
|
|
-<!-- map/reduce properties -->
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.tasktracker.tasks.sleeptime-before-sigkill</name>
|
|
|
- <value>250</value>
|
|
|
- <description>Normally, this is the amount of time before killing
|
|
|
- processes, and the recommended-default is 5.000 seconds - a value of
|
|
|
- 5000 here. In this case, we are using it solely to blast tasks before
|
|
|
- killing them, and killing them very quickly (1/4 second) to guarantee
|
|
|
- that we do not leave VMs around for later jobs.
|
|
|
- </description>
|
|
|
-</property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.job.tracker.handler.count</name>
|
|
|
- <value>50</value>
|
|
|
- <description>
|
|
|
- The number of server threads for the JobTracker. This should be roughly
|
|
|
- 4% of the number of tasktracker nodes.
|
|
|
- </description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.system.dir</name>
|
|
|
- <value>/mapred/system</value>
|
|
|
- <description>No description</description>
|
|
|
- <final>true</final>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.job.tracker</name>
|
|
|
- <!-- cluster variant -->
|
|
|
- <value></value>
|
|
|
- <description>No description</description>
|
|
|
- <final>true</final>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.job.tracker.http.address</name>
|
|
|
- <!-- cluster variant -->
|
|
|
- <value></value>
|
|
|
- <description>No description</description>
|
|
|
- <final>true</final>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <!-- cluster specific -->
|
|
|
- <name>mapred.local.dir</name>
|
|
|
- <value></value>
|
|
|
- <description>No description</description>
|
|
|
- <final>true</final>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapreduce.cluster.administrators</name>
|
|
|
- <value> hadoop</value>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.reduce.parallel.copies</name>
|
|
|
- <value>30</value>
|
|
|
- <description>No description</description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.tasktracker.map.tasks.maximum</name>
|
|
|
- <value></value>
|
|
|
- <description>No description</description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.tasktracker.reduce.tasks.maximum</name>
|
|
|
- <value></value>
|
|
|
- <description>No description</description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>tasktracker.http.threads</name>
|
|
|
- <value>50</value>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.map.tasks.speculative.execution</name>
|
|
|
- <value>false</value>
|
|
|
- <description>If true, then multiple instances of some map tasks
|
|
|
- may be executed in parallel.</description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.reduce.tasks.speculative.execution</name>
|
|
|
- <value>false</value>
|
|
|
- <description>If true, then multiple instances of some reduce tasks
|
|
|
- may be executed in parallel.</description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.reduce.slowstart.completed.maps</name>
|
|
|
- <value>0.05</value>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.inmem.merge.threshold</name>
|
|
|
- <value>1000</value>
|
|
|
- <description>The threshold, in terms of the number of files
|
|
|
- for the in-memory merge process. When we accumulate threshold number of files
|
|
|
- we initiate the in-memory merge and spill to disk. A value of 0 or less than
|
|
|
- 0 indicates we want to DON'T have any threshold and instead depend only on
|
|
|
- the ramfs's memory consumption to trigger the merge.
|
|
|
- </description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.job.shuffle.merge.percent</name>
|
|
|
- <value>0.66</value>
|
|
|
- <description>The usage threshold at which an in-memory merge will be
|
|
|
- initiated, expressed as a percentage of the total memory allocated to
|
|
|
- storing in-memory map outputs, as defined by
|
|
|
- mapred.job.shuffle.input.buffer.percent.
|
|
|
- </description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.job.shuffle.input.buffer.percent</name>
|
|
|
- <value>0.7</value>
|
|
|
- <description>The percentage of memory to be allocated from the maximum heap
|
|
|
- size to storing map outputs during the shuffle.
|
|
|
- </description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.map.output.compression.codec</name>
|
|
|
- <value></value>
|
|
|
- <description>If the map outputs are compressed, how should they be
|
|
|
- compressed
|
|
|
- </description>
|
|
|
- </property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.output.compression.type</name>
|
|
|
- <value>BLOCK</value>
|
|
|
- <description>If the job outputs are to compressed as SequenceFiles, how should
|
|
|
- they be compressed? Should be one of NONE, RECORD or BLOCK.
|
|
|
- </description>
|
|
|
-</property>
|
|
|
-
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.jobtracker.completeuserjobs.maximum</name>
|
|
|
- <value>5</value>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.jobtracker.taskScheduler</name>
|
|
|
- <value></value>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.jobtracker.restart.recover</name>
|
|
|
- <value>false</value>
|
|
|
- <description>"true" to enable (job) recovery upon restart,
|
|
|
- "false" to start afresh
|
|
|
- </description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.job.reduce.input.buffer.percent</name>
|
|
|
- <value>0.0</value>
|
|
|
- <description>The percentage of memory- relative to the maximum heap size- to
|
|
|
- retain map outputs during the reduce. When the shuffle is concluded, any
|
|
|
- remaining map outputs in memory must consume less than this threshold before
|
|
|
- the reduce can begin.
|
|
|
- </description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapreduce.reduce.input.limit</name>
|
|
|
- <value>10737418240</value>
|
|
|
- <description>The limit on the input size of the reduce. (This value
|
|
|
- is 10 Gb.) If the estimated input size of the reduce is greater than
|
|
|
- this value, job is failed. A value of -1 means that there is no limit
|
|
|
- set. </description>
|
|
|
-</property>
|
|
|
-
|
|
|
-
|
|
|
- <!-- copied from kryptonite configuration -->
|
|
|
- <property>
|
|
|
- <name>mapred.compress.map.output</name>
|
|
|
- <value></value>
|
|
|
- </property>
|
|
|
-
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.task.timeout</name>
|
|
|
- <value>600000</value>
|
|
|
- <description>The number of milliseconds before a task will be
|
|
|
- terminated if it neither reads an input, writes an output, nor
|
|
|
- updates its status string.
|
|
|
- </description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>jetty.connector</name>
|
|
|
- <value>org.mortbay.jetty.nio.SelectChannelConnector</value>
|
|
|
- <description>No description</description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.task.tracker.task-controller</name>
|
|
|
- <value></value>
|
|
|
- <description>
|
|
|
- TaskController which is used to launch and manage task execution.
|
|
|
- </description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.child.root.logger</name>
|
|
|
- <value>INFO,TLA</value>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.child.java.opts</name>
|
|
|
- <value></value>
|
|
|
-
|
|
|
- <description>No description</description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.cluster.map.memory.mb</name>
|
|
|
- <value></value>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.cluster.reduce.memory.mb</name>
|
|
|
- <value></value>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.job.map.memory.mb</name>
|
|
|
- <value></value>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.job.reduce.memory.mb</name>
|
|
|
- <value></value>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.cluster.max.map.memory.mb</name>
|
|
|
- <value></value>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapred.cluster.max.reduce.memory.mb</name>
|
|
|
- <value></value>
|
|
|
- </property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.hosts</name>
|
|
|
- <value></value>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.hosts.exclude</name>
|
|
|
- <value></value>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.max.tracker.blacklists</name>
|
|
|
- <value>16</value>
|
|
|
- <description>
|
|
|
- if node is reported blacklisted by 16 successful jobs within timeout-window, it will be graylisted
|
|
|
- </description>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.healthChecker.script.path</name>
|
|
|
- <value></value>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.healthChecker.interval</name>
|
|
|
- <value>135000</value>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.healthChecker.script.timeout</name>
|
|
|
- <value>60000</value>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.job.tracker.persist.jobstatus.active</name>
|
|
|
- <value>false</value>
|
|
|
- <description>Indicates if persistency of job status information is
|
|
|
- active or not.
|
|
|
- </description>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.job.tracker.persist.jobstatus.hours</name>
|
|
|
- <value>1</value>
|
|
|
- <description>The number of hours job status information is persisted in DFS.
|
|
|
- The job status information will be available after it drops of the memory
|
|
|
- queue and between jobtracker restarts. With a zero value the job status
|
|
|
- information is not persisted at all in DFS.
|
|
|
- </description>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.job.tracker.persist.jobstatus.dir</name>
|
|
|
- <value></value>
|
|
|
- <description>The directory where the job status information is persisted
|
|
|
- in a file system to be available after it drops of the memory queue and
|
|
|
- between jobtracker restarts.
|
|
|
- </description>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.jobtracker.retirejob.check</name>
|
|
|
- <value>10000</value>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.jobtracker.retirejob.interval</name>
|
|
|
- <value>0</value>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.job.tracker.history.completed.location</name>
|
|
|
- <value>/mapred/history/done</value>
|
|
|
- <description>No description</description>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.task.maxvmem</name>
|
|
|
- <value></value>
|
|
|
- <final>true</final>
|
|
|
- <description>No description</description>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.jobtracker.maxtasks.per.job</name>
|
|
|
- <value></value>
|
|
|
- <final>true</final>
|
|
|
- <description>The maximum number of tasks for a single job.
|
|
|
- A value of -1 indicates that there is no maximum. </description>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapreduce.fileoutputcommitter.marksuccessfuljobs</name>
|
|
|
- <value>false</value>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.userlog.retain.hours</name>
|
|
|
- <value></value>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.job.reuse.jvm.num.tasks</name>
|
|
|
- <value>1</value>
|
|
|
- <description>
|
|
|
- How many tasks to run per jvm. If set to -1, there is no limit
|
|
|
- </description>
|
|
|
- <final>true</final>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapreduce.jobtracker.kerberos.principal</name>
|
|
|
- <value></value>
|
|
|
- <description>
|
|
|
- JT user name key.
|
|
|
- </description>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapreduce.tasktracker.kerberos.principal</name>
|
|
|
- <value></value>
|
|
|
- <description>
|
|
|
- tt user name key. "_HOST" is replaced by the host name of the task tracker.
|
|
|
- </description>
|
|
|
-</property>
|
|
|
-
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>hadoop.job.history.user.location</name>
|
|
|
- <value>none</value>
|
|
|
- <final>true</final>
|
|
|
- </property>
|
|
|
-
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapreduce.jobtracker.keytab.file</name>
|
|
|
- <value></value>
|
|
|
- <description>
|
|
|
- The keytab for the jobtracker principal.
|
|
|
- </description>
|
|
|
-
|
|
|
-</property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapreduce.tasktracker.keytab.file</name>
|
|
|
- <value></value>
|
|
|
- <description>The filename of the keytab for the task tracker</description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapreduce.jobtracker.staging.root.dir</name>
|
|
|
- <value>/user</value>
|
|
|
- <description>The Path prefix for where the staging directories should be placed. The next level is always the user's
|
|
|
- name. It is a path in the default file system.</description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapreduce.tasktracker.group</name>
|
|
|
- <value>hadoop</value>
|
|
|
- <description>The group that the task controller uses for accessing the task controller. The mapred user must be a member and users should *not* be members.</description>
|
|
|
-
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapreduce.jobtracker.split.metainfo.maxsize</name>
|
|
|
- <value>50000000</value>
|
|
|
- <final>true</final>
|
|
|
- <description>If the size of the split metainfo file is larger than this, the JobTracker will fail the job during
|
|
|
- initialize.
|
|
|
- </description>
|
|
|
- </property>
|
|
|
- <property>
|
|
|
- <name>mapreduce.history.server.embedded</name>
|
|
|
- <value>false</value>
|
|
|
- <description>Should job history server be embedded within Job tracker
|
|
|
-process</description>
|
|
|
- <final>true</final>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapreduce.history.server.http.address</name>
|
|
|
- <!-- cluster variant -->
|
|
|
- <value></value>
|
|
|
- <description>Http address of the history server</description>
|
|
|
- <final>true</final>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapreduce.jobhistory.kerberos.principal</name>
|
|
|
- <!-- cluster variant -->
|
|
|
- <value></value>
|
|
|
- <description>Job history user name key. (must map to same user as JT
|
|
|
-user)</description>
|
|
|
- </property>
|
|
|
-
|
|
|
- <property>
|
|
|
- <name>mapreduce.jobhistory.keytab.file</name>
|
|
|
- <!-- cluster variant -->
|
|
|
- <value></value>
|
|
|
- <description>The keytab for the job history server principal.</description>
|
|
|
- </property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.jobtracker.blacklist.fault-timeout-window</name>
|
|
|
- <value>180</value>
|
|
|
- <description>
|
|
|
- 3-hour sliding window (value is in minutes)
|
|
|
- </description>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.jobtracker.blacklist.fault-bucket-width</name>
|
|
|
- <value>15</value>
|
|
|
- <description>
|
|
|
- 15-minute bucket size (value is in minutes)
|
|
|
- </description>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapred.queue.names</name>
|
|
|
- <value>default</value>
|
|
|
- <description> Comma separated list of queues configured for this jobtracker.</description>
|
|
|
-</property>
|
|
|
-
|
|
|
-<property>
|
|
|
- <name>mapreduce.shuffle.port</name>
|
|
|
- <value>8081</value>
|
|
|
- <description>Default port that the ShuffleHandler will run on. ShuffleHandler is a service run at the NodeManager to facilitate transfers of intermediate Map outputs to requesting Reducers.</description>
|
|
|
-</property>
|
|
|
-
|
|
|
-</configuration>
|