hadoop-default.xml 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. <?xml version="1.0"?>
  2. <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  3. <!-- Do not modify this file directly. Instead, copy entries that you -->
  4. <!-- wish to modify from this file into hadoop-site.xml and change them -->
  5. <!-- there. If hadoop-site.xml does not already exist, create it. -->
  6. <configuration>
  7. <!-- i/o properties -->
  8. <property>
  9. <name>io.sort.factor</name>
  10. <value>10</value>
  11. <description>The number of streams to merge at once while sorting
  12. files. This determines the number of open file handles.</description>
  13. </property>
  14. <property>
  15. <name>io.sort.mb</name>
  16. <value>100</value>
  17. <description>The total amount of buffer memory to use while sorting
  18. files, in megabytes. By default, gives each merge stream 1MB, which
  19. should minimize seeks.</description>
  20. </property>
  21. <property>
  22. <name>io.file.buffer.size</name>
  23. <value>4096</value>
  24. <description>The size of buffer for use in sequence files.
  25. The size of this buffer should probably be a multiple of hardware
  26. page size (4096 on Intel x86), and it determines how much data is
  27. buffered during read and write operations.</description>
  28. </property>
  29. <property>
  30. <name>io.bytes.per.checksum</name>
  31. <value>512</value>
  32. <description>The number of bytes per checksum. Must not be larger than
  33. io.file.buffer.size.</description>
  34. </property>
  35. <property>
  36. <name>io.skip.checksum.errors</name>
  37. <value>false</value>
  38. <description>If true, when a checksum error is encountered while
  39. reading a sequence file, entries are skipped, instead of throwing an
  40. exception.</description>
  41. </property>
  42. <property>
  43. <name>io.map.index.skip</name>
  44. <value>0</value>
  45. <description>Number of index entries to skip between each entry.
  46. Zero by default. Setting this to values larger than zero can
  47. facilitate opening large map files using less memory.</description>
  48. </property>
  49. <!-- file system properties -->
  50. <property>
  51. <name>fs.default.name</name>
  52. <value>local</value>
  53. <description>The name of the default file system. Either the
  54. literal string "local" or a host:port for DFS.</description>
  55. </property>
  56. <property>
  57. <name>dfs.datanode.port</name>
  58. <value>50010</value>
  59. <description>The port number that the dfs datanode server uses as a starting
  60. point to look for a free port to listen on.
  61. </description>
  62. </property>
  63. <property>
  64. <name>dfs.name.dir</name>
  65. <value>/tmp/hadoop/dfs/name</value>
  66. <description>Determines where on the local filesystem the DFS name node
  67. should store the name table.</description>
  68. </property>
  69. <property>
  70. <name>dfs.data.dir</name>
  71. <value>/tmp/hadoop/dfs/data</value>
  72. <description>Determines where on the local filesystem an DFS data node
  73. should store its blocks. If this is a comma- or space-delimited
  74. list of directories, then data will be stored in all named
  75. directories, typically on different devices.</description>
  76. </property>
  77. <property>
  78. <name>dfs.replication</name>
  79. <value>3</value>
  80. <description>How many copies we try to have at all times. The actual
  81. number of replications is at max the number of datanodes in the
  82. cluster.</description>
  83. </property>
  84. <property>
  85. <name>dfs.df.interval</name>
  86. <value>3000</value>
  87. <description>Disk usage statistics refresh interval in msec.</description>
  88. </property>
  89. <!-- map/reduce properties -->
  90. <property>
  91. <name>mapred.job.tracker</name>
  92. <value>local</value>
  93. <description>The host and port that the MapReduce job tracker runs
  94. at. If "local", then jobs are run in-process as a single map
  95. and reduce task.
  96. </description>
  97. </property>
  98. <property>
  99. <name>mapred.job.tracker.info.port</name>
  100. <value>50030</value>
  101. <description>The port that the MapReduce job tracker info webserver runs at.
  102. </description>
  103. </property>
  104. <property>
  105. <name>mapred.task.tracker.output.port</name>
  106. <value>50040</value>
  107. <description>The port number that the MapReduce task tracker output server uses as a starting
  108. point to look for a free port to listen on.
  109. </description>
  110. </property>
  111. <property>
  112. <name>mapred.task.tracker.report.port</name>
  113. <value>50050</value>
  114. <description>The port number that the MapReduce task tracker report server uses as a starting
  115. point to look for a free port to listen on.
  116. </description>
  117. </property>
  118. <property>
  119. <name>mapred.local.dir</name>
  120. <value>/tmp/hadoop/mapred/local</value>
  121. <description>The local directory where MapReduce stores intermediate
  122. data files. May be a space- or comma- separated list of
  123. directories on different devices in order to spread disk i/o.
  124. </description>
  125. </property>
  126. <property>
  127. <name>mapred.system.dir</name>
  128. <value>/tmp/hadoop/mapred/system</value>
  129. <description>The shared directory where MapReduce stores control files.
  130. </description>
  131. </property>
  132. <property>
  133. <name>mapred.temp.dir</name>
  134. <value>/tmp/hadoop/mapred/temp</value>
  135. <description>A shared directory for temporary files.
  136. </description>
  137. </property>
  138. <property>
  139. <name>mapred.map.tasks</name>
  140. <value>2</value>
  141. <description>The default number of map tasks per job. Typically set
  142. to a prime several times greater than number of available hosts.
  143. Ignored when mapred.job.tracker is "local".
  144. </description>
  145. </property>
  146. <property>
  147. <name>mapred.reduce.tasks</name>
  148. <value>1</value>
  149. <description>The default number of reduce tasks per job. Typically set
  150. to a prime close to the number of available hosts. Ignored when
  151. mapred.job.tracker is "local".
  152. </description>
  153. </property>
  154. <property>
  155. <name>mapred.task.timeout</name>
  156. <value>600000</value>
  157. <description>The number of milliseconds before a task will be
  158. terminated if it neither reads an input, writes an output, nor
  159. updates its status string.
  160. </description>
  161. </property>
  162. <property>
  163. <name>mapred.tasktracker.tasks.maximum</name>
  164. <value>2</value>
  165. <description>The maximum number of tasks that will be run
  166. simultaneously by a task tracker.
  167. </description>
  168. </property>
  169. <property>
  170. <name>mapred.child.java.opts</name>
  171. <value>-Xmx200m</value>
  172. <description>Java opts for the task tracker child processes. Subsumes
  173. 'mapred.child.heap.size' (If a mapred.child.heap.size value is found
  174. in a configuration, its maximum heap size will be used and a warning
  175. emitted that heap.size has been deprecated). Also, the following symbols,
  176. if present, will be interpolated: @taskid@ is replaced by current TaskID;
  177. and @port@ will be replaced by mapred.task.tracker.report.port + 1 (A second
  178. child will fail with a port-in-use if mapred.tasktracker.tasks.maximum is
  179. greater than one). Any other occurrences of '@' will go unchanged. For
  180. example, to enable verbose gc logging to a file named for the taskid in
  181. /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
  182. -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
  183. </description>
  184. </property>
  185. <property>
  186. <name>mapred.combine.buffer.size</name>
  187. <value>100000</value>
  188. <description>The number of entries the combining collector caches before
  189. combining them and writing to disk.</description>
  190. </property>
  191. <property>
  192. <name>mapred.speculative.execution</name>
  193. <value>true</value>
  194. <description>If true, then multiple instances of some map tasks may
  195. be executed in parallel.</description>
  196. </property>
  197. <property>
  198. <name>mapred.min.split.size</name>
  199. <value>0</value>
  200. <description>The minimum size chunk that map input should be split
  201. into. Note that some file formats may have minimum split sizes that
  202. take priority over this setting.</description>
  203. </property>
  204. <!-- ipc properties -->
  205. <property>
  206. <name>ipc.client.timeout</name>
  207. <value>60000</value>
  208. <description>Defines the timeout for IPC calls in milliseconds.</description>
  209. </property>
  210. </configuration>