Pārlūkot izejas kodu

cp conf dir to mapred and hdfs dir's

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/branches/HADOOP-4687/hdfs@780614 13f79535-47bb-0310-9956-ffa450edef68
Giridharan Kesavan 16 gadi atpakaļ
vecāks
revīzija
e8d6808213

+ 126 - 0
conf/capacity-scheduler.xml.template

@@ -0,0 +1,126 @@
+<?xml version="1.0"?>
+
+<!-- This is the configuration file for the resource manager in Hadoop. -->
+<!-- You can configure various scheduling parameters related to queues. -->
+<!-- The properties for a queue follow a naming convention,such as, -->
+<!-- mapred.capacity-scheduler.queue.<queue-name>.property-name. -->
+
+<configuration>
+
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.capacity</name>
+    <value>100</value>
+    <description>Percentage of the number of slots in the cluster that are
+      to be available for jobs in this queue.
+    </description>    
+  </property>
+  
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.supports-priority</name>
+    <value>false</value>
+    <description>If true, priorities of jobs will be taken into 
+      account in scheduling decisions.
+    </description>
+  </property>
+
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.minimum-user-limit-percent</name>
+    <value>100</value>
+    <description> Each queue enforces a limit on the percentage of resources 
+    allocated to a user at any given time, if there is competition for them. 
+    This user limit can vary between a minimum and maximum value. The former
+    depends on the number of users who have submitted jobs, and the latter is
+    set to this property value. For example, suppose the value of this 
+    property is 25. If two users have submitted jobs to a queue, no single 
+    user can use more than 50% of the queue resources. If a third user submits
+    a job, no single user can use more than 33% of the queue resources. With 4 
+    or more users, no user can use more than 25% of the queue's resources. A 
+    value of 100 implies no user limits are imposed. 
+    </description>
+  </property>
+  <property>
+    <name>mapred.capacity-scheduler.queue.default.maximum-initialized-jobs-per-user</name>
+    <value>2</value>
+    <description>The maximum number of jobs to be pre-initialized for a user
+    of the job queue.
+    </description>
+  </property>
+  
+  <!-- The default configuration settings for the capacity task scheduler -->
+  <!-- The default values would be applied to all the queues which don't have -->
+  <!-- the appropriate property for the particular queue -->
+  <property>
+    <name>mapred.capacity-scheduler.default-supports-priority</name>
+    <value>false</value>
+    <description>If true, priorities of jobs will be taken into 
+      account in scheduling decisions by default in a job queue.
+    </description>
+  </property>
+
+  <property>
+    <name>mapred.capacity-scheduler.task.default-pmem-percentage-in-vmem</name>
+    <value>-1</value>
+    <description>A percentage (float) of the default VM limit for jobs
+   	  (mapred.task.default.maxvm). This is the default RAM task-limit 
+   	  associated with a task. Unless overridden by a job's setting, this 
+   	  number defines the RAM task-limit.
+
+      If this property is missing, or set to an invalid value, scheduling 
+      based on physical memory, RAM, is disabled.  
+    </description>
+  </property>
+
+  <property>
+    <name>mapred.capacity-scheduler.task.limit.maxpmem</name>
+    <value>-1</value>
+    <description>Configuration that provides an upper limit on the maximum
+      physical memory that can be specified by a job. The job configuration
+      mapred.task.maxpmem should be less than this value. If not, the job will
+      be rejected by the scheduler.
+      
+      If it is set to -1, scheduler will not consider physical memory for
+      scheduling even if virtual memory based scheduling is enabled(by setting
+      valid values for both mapred.task.default.maxvmem and
+      mapred.task.limit.maxvmem).
+    </description>
+  </property>
+  
+  <property>
+    <name>mapred.capacity-scheduler.default-minimum-user-limit-percent</name>
+    <value>100</value>
+    <description>The percentage of the resources limited to a particular user
+      for the job queue at any given point of time by default.
+    </description>
+  </property>
+
+  <property>
+    <name>mapred.capacity-scheduler.default-maximum-initialized-jobs-per-user</name>
+    <value>2</value>
+    <description>The maximum number of jobs to be pre-initialized for a user
+    of the job queue.
+    </description>
+  </property>
+
+
+  <!-- Capacity scheduler Job Initialization configuration parameters -->
+  <property>
+    <name>mapred.capacity-scheduler.init-poll-interval</name>
+    <value>5000</value>
+    <description>The amount of time in miliseconds which is used to poll 
+    the job queues for jobs to initialize.
+    </description>
+  </property>
+  <property>
+    <name>mapred.capacity-scheduler.init-worker-threads</name>
+    <value>5</value>
+    <description>Number of worker threads which would be used by
+    Initialization poller to initialize jobs in a set of queue.
+    If number mentioned in property is equal to number of job queues
+    then a single thread would initialize jobs in a queue. If lesser
+    then a thread would get a set of queues assigned. If the number
+    is greater then number of threads would be equal to number of 
+    job queues.
+    </description>
+  </property>
+
+</configuration>

+ 24 - 0
conf/configuration.xsl

@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+<xsl:output method="html"/>
+<xsl:template match="configuration">
+<html>
+<body>
+<table border="1">
+<tr>
+ <td>name</td>
+ <td>value</td>
+ <td>description</td>
+</tr>
+<xsl:for-each select="property">
+<tr>
+  <td><a name="{name}"><xsl:value-of select="name"/></a></td>
+  <td><xsl:value-of select="value"/></td>
+  <td><xsl:value-of select="description"/></td>
+</tr>
+</xsl:for-each>
+</table>
+</body>
+</html>
+</xsl:template>
+</xsl:stylesheet>

+ 8 - 0
conf/core-site.xml.template

@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+</configuration>

+ 54 - 0
conf/hadoop-env.sh.template

@@ -0,0 +1,54 @@
+# Set Hadoop-specific environment variables here.
+
+# The only required environment variable is JAVA_HOME.  All others are
+# optional.  When running a distributed configuration it is best to
+# set JAVA_HOME in this file, so that it is correctly defined on
+# remote nodes.
+
+# The java implementation to use.  Required.
+# export JAVA_HOME=/usr/lib/j2sdk1.5-sun
+
+# Extra Java CLASSPATH elements.  Optional.
+# export HADOOP_CLASSPATH=
+
+# The maximum amount of heap to use, in MB. Default is 1000.
+# export HADOOP_HEAPSIZE=2000
+
+# Extra Java runtime options.  Empty by default.
+# export HADOOP_OPTS=-server
+
+# Command specific options appended to HADOOP_OPTS when specified
+export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
+export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
+export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
+export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
+export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
+# export HADOOP_TASKTRACKER_OPTS=
+# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
+# export HADOOP_CLIENT_OPTS
+
+# Extra ssh options.  Empty by default.
+# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"
+
+# Where log files are stored.  $HADOOP_HOME/logs by default.
+# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
+
+# File naming remote slave hosts.  $HADOOP_HOME/conf/slaves by default.
+# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
+
+# host:path where hadoop code should be rsync'd from.  Unset by default.
+# export HADOOP_MASTER=master:/home/$USER/src/hadoop
+
+# Seconds to sleep between slave commands.  Unset by default.  This
+# can be useful in large clusters, where, e.g., slave rsyncs can
+# otherwise arrive faster than the master can service them.
+# export HADOOP_SLAVE_SLEEP=0.1
+
+# The directory where pid files are stored. /tmp by default.
+# export HADOOP_PID_DIR=/var/hadoop/pids
+
+# A string representing this instance of hadoop. $USER by default.
+# export HADOOP_IDENT_STRING=$USER
+
+# The scheduling priority for daemon processes.  See 'man nice'.
+# export HADOOP_NICENESS=10

+ 53 - 0
conf/hadoop-metrics.properties

@@ -0,0 +1,53 @@
+# Configuration of the "dfs" context for null
+dfs.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "dfs" context for file
+#dfs.class=org.apache.hadoop.metrics.file.FileContext
+#dfs.period=10
+#dfs.fileName=/tmp/dfsmetrics.log
+
+# Configuration of the "dfs" context for ganglia
+# dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# dfs.period=10
+# dfs.servers=localhost:8649
+
+
+# Configuration of the "mapred" context for null
+mapred.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "mapred" context for file
+#mapred.class=org.apache.hadoop.metrics.file.FileContext
+#mapred.period=10
+#mapred.fileName=/tmp/mrmetrics.log
+
+# Configuration of the "mapred" context for ganglia
+# mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# mapred.period=10
+# mapred.servers=localhost:8649
+
+
+# Configuration of the "jvm" context for null
+jvm.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "jvm" context for file
+#jvm.class=org.apache.hadoop.metrics.file.FileContext
+#jvm.period=10
+#jvm.fileName=/tmp/jvmmetrics.log
+
+# Configuration of the "jvm" context for ganglia
+# jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# jvm.period=10
+# jvm.servers=localhost:8649
+
+# Configuration of the "rpc" context for null
+rpc.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "rpc" context for file
+#rpc.class=org.apache.hadoop.metrics.file.FileContext
+#rpc.period=10
+#rpc.fileName=/tmp/rpcmetrics.log
+
+# Configuration of the "rpc" context for ganglia
+# rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# rpc.period=10
+# rpc.servers=localhost:8649

+ 97 - 0
conf/hadoop-policy.xml.template

@@ -0,0 +1,97 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+  <property>
+    <name>security.client.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ClientProtocol, which is used by user code 
+    via the DistributedFileSystem. 
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.client.datanode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ClientDatanodeProtocol, the client-to-datanode protocol 
+    for block recovery.
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.datanode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for DatanodeProtocol, which is used by datanodes to 
+    communicate with the namenode.
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.inter.datanode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for InterDatanodeProtocol, the inter-datanode protocol
+    for updating generation timestamp.
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.namenode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for NamenodeProtocol, the protocol used by the secondary
+    namenode to communicate with the namenode.
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.inter.tracker.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for InterTrackerProtocol, used by the tasktrackers to 
+    communicate with the jobtracker.
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.job.submission.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for JobSubmissionProtocol, used by job clients to 
+    communciate with the jobtracker for job submission, querying job status etc.
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.task.umbilical.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for TaskUmbilicalProtocol, used by the map and reduce 
+    tasks to communicate with the parent tasktracker. 
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.refresh.policy.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for RefreshAuthorizationPolicyProtocol, used by the 
+    dfsadmin and mradmin commands to refresh the security policy in-effect. 
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+</configuration>

+ 8 - 0
conf/hdfs-site.xml.template

@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+</configuration>

+ 94 - 0
conf/log4j.properties

@@ -0,0 +1,94 @@
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=INFO,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshhold=ALL
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this 
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# Rolling File Appender
+#
+
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+#
+# FSNamesystem Audit logging
+# All audit events are logged at INFO level
+#
+log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter

+ 31 - 0
conf/mapred-queue-acls.xml.template

@@ -0,0 +1,31 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- This is a template file for queue acls configuration properties -->
+
+<configuration>
+
+<property>
+  <name>mapred.queue.default.acl-submit-job</name>
+  <value>*</value>
+  <description> Comma separated list of user and group names that are allowed
+    to submit jobs to the 'default' queue. The user list and the group list
+    are separated by a blank. For e.g. alice,bob group1,group2. 
+    If set to the special value '*', it means all users are allowed to 
+    submit jobs. 
+  </description>
+</property>
+
+<property>
+  <name>mapred.queue.default.acl-administer-jobs</name>
+  <value>*</value>
+  <description> Comma separated list of user and group names that are allowed
+    to delete jobs or modify job's priority for jobs not owned by the current
+    user in the 'default' queue. The user list and the group list
+    are separated by a blank. For e.g. alice,bob group1,group2. 
+    If set to the special value '*', it means all users are allowed to do 
+    this operation.
+  </description>
+</property>
+
+</configuration>

+ 8 - 0
conf/mapred-site.xml.template

@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+</configuration>

+ 1 - 0
conf/masters.template

@@ -0,0 +1 @@
+localhost

+ 1 - 0
conf/slaves.template

@@ -0,0 +1 @@
+localhost

+ 57 - 0
conf/ssl-client.xml.example

@@ -0,0 +1,57 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+
+<property>
+  <name>ssl.client.truststore.location</name>
+  <value></value>
+  <description>Truststore to be used by clients like distcp. Must be
+  specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.truststore.password</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.truststore.type</name>
+  <value>jks</value>
+  <description>Optional. Default value is "jks".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.location</name>
+  <value></value>
+  <description>Keystore to be used by clients like distcp. Must be
+  specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.password</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.keypassword</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.type</name>
+  <value>jks</value>
+  <description>Optional. Default value is "jks".
+  </description>
+</property>
+
+</configuration>

+ 55 - 0
conf/ssl-server.xml.example

@@ -0,0 +1,55 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+
+<property>
+  <name>ssl.server.truststore.location</name>
+  <value></value>
+  <description>Truststore to be used by NN and DN. Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.truststore.password</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.truststore.type</name>
+  <value>jks</value>
+  <description>Optional. Default value is "jks".
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.keystore.location</name>
+  <value></value>
+  <description>Keystore to be used by NN and DN. Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.keystore.password</name>
+  <value></value>
+  <description>Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.keystore.keypassword</name>
+  <value></value>
+  <description>Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.keystore.type</name>
+  <value>jks</value>
+  <description>Optional. Default value is "jks".
+  </description>
+</property>
+
+</configuration>

+ 3 - 0
conf/taskcontroller.cfg

@@ -0,0 +1,3 @@
+mapred.local.dir=#configured value of hadoop.tmp.dir it can be a list of paths comma seperated
+hadoop.pid.dir=#configured HADOOP_PID_DIR
+hadoop.indent.str=#configured HADOOP_IDENT_STR