Browse Source

HADOOP-4164. Chinese translation of the documentation. Merged from 0.19 to 0.18
(Xuebing Yan via omalley)


git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/branches/branch-0.18@725508 13f79535-47bb-0310-9956-ffa450edef68

Owen O'Malley 16 năm trước cách đây
mục cha
commit
ebf6b3fc35
100 tập tin đã thay đổi với 19078 bổ sung2 xóa
  1. 5 0
      CHANGES.txt
  2. 22 0
      build.xml
  3. 24 2
      docs/changes.html
  4. 2 0
      docs/cn/broken-links.xml
  5. 730 0
      docs/cn/cluster_setup.html
  6. 209 0
      docs/cn/cluster_setup.pdf
  7. 1116 0
      docs/cn/commands_manual.html
  8. 261 0
      docs/cn/commands_manual.pdf
  9. 563 0
      docs/cn/distcp.html
  10. 149 0
      docs/cn/distcp.pdf
  11. 1108 0
      docs/cn/hadoop-default.html
  12. 302 0
      docs/cn/hadoop_archives.html
  13. 137 0
      docs/cn/hadoop_archives.pdf
  14. 664 0
      docs/cn/hdfs_design.html
  15. 424 0
      docs/cn/hdfs_design.pdf
  16. 504 0
      docs/cn/hdfs_permissions_guide.html
  17. 129 0
      docs/cn/hdfs_permissions_guide.pdf
  18. 277 0
      docs/cn/hdfs_quota_admin_guide.html
  19. 47 0
      docs/cn/hdfs_quota_admin_guide.pdf
  20. 860 0
      docs/cn/hdfs_shell.html
  21. 347 0
      docs/cn/hdfs_shell.pdf
  22. 718 0
      docs/cn/hdfs_user_guide.html
  23. 195 0
      docs/cn/hdfs_user_guide.pdf
  24. 257 0
      docs/cn/hod.html
  25. 144 0
      docs/cn/hod.pdf
  26. 557 0
      docs/cn/hod_admin_guide.html
  27. 162 0
      docs/cn/hod_admin_guide.pdf
  28. 422 0
      docs/cn/hod_config_guide.html
  29. 140 0
      docs/cn/hod_config_guide.pdf
  30. 1251 0
      docs/cn/hod_user_guide.html
  31. 358 0
      docs/cn/hod_user_guide.pdf
  32. BIN
      docs/cn/images/built-with-forrest-button.png
  33. BIN
      docs/cn/images/core-logo.gif
  34. BIN
      docs/cn/images/favicon.ico
  35. BIN
      docs/cn/images/hadoop-logo.jpg
  36. BIN
      docs/cn/images/hdfsarchitecture.gif
  37. BIN
      docs/cn/images/hdfsdatanodes.gif
  38. BIN
      docs/cn/images/instruction_arrow.png
  39. 268 0
      docs/cn/index.html
  40. 160 0
      docs/cn/index.pdf
  41. 380 0
      docs/cn/linkmap.html
  42. 62 0
      docs/cn/linkmap.pdf
  43. 3464 0
      docs/cn/mapred_tutorial.html
  44. 250 0
      docs/cn/mapred_tutorial.pdf
  45. 462 0
      docs/cn/native_libraries.html
  46. 107 0
      docs/cn/native_libraries.pdf
  47. 574 0
      docs/cn/quickstart.html
  48. 173 0
      docs/cn/quickstart.pdf
  49. 23 0
      docs/cn/skin/CommonMessages_de.xml
  50. 23 0
      docs/cn/skin/CommonMessages_en_US.xml
  51. 23 0
      docs/cn/skin/CommonMessages_es.xml
  52. 23 0
      docs/cn/skin/CommonMessages_fr.xml
  53. 166 0
      docs/cn/skin/basic.css
  54. 90 0
      docs/cn/skin/breadcrumbs-optimized.js
  55. 237 0
      docs/cn/skin/breadcrumbs.js
  56. 166 0
      docs/cn/skin/fontsize.js
  57. 40 0
      docs/cn/skin/getBlank.js
  58. 45 0
      docs/cn/skin/getMenu.js
  59. 1 0
      docs/cn/skin/images/README.txt
  60. BIN
      docs/cn/skin/images/add.jpg
  61. BIN
      docs/cn/skin/images/built-with-forrest-button.png
  62. BIN
      docs/cn/skin/images/chapter.gif
  63. BIN
      docs/cn/skin/images/chapter_open.gif
  64. 92 0
      docs/cn/skin/images/corner-imports.svg.xslt
  65. BIN
      docs/cn/skin/images/current.gif
  66. 28 0
      docs/cn/skin/images/dc.svg.xslt
  67. BIN
      docs/cn/skin/images/error.png
  68. BIN
      docs/cn/skin/images/external-link.gif
  69. BIN
      docs/cn/skin/images/fix.jpg
  70. BIN
      docs/cn/skin/images/forrest-credit-logo.png
  71. BIN
      docs/cn/skin/images/hack.jpg
  72. BIN
      docs/cn/skin/images/header_white_line.gif
  73. BIN
      docs/cn/skin/images/info.png
  74. BIN
      docs/cn/skin/images/instruction_arrow.png
  75. BIN
      docs/cn/skin/images/label.gif
  76. BIN
      docs/cn/skin/images/page.gif
  77. BIN
      docs/cn/skin/images/pdfdoc.gif
  78. BIN
      docs/cn/skin/images/poddoc.png
  79. 55 0
      docs/cn/skin/images/poddoc.svg.xslt
  80. BIN
      docs/cn/skin/images/printer.gif
  81. BIN
      docs/cn/skin/images/rc-b-l-15-1body-2menu-3menu.png
  82. BIN
      docs/cn/skin/images/rc-b-r-15-1body-2menu-3menu.png
  83. BIN
      docs/cn/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png
  84. BIN
      docs/cn/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png
  85. BIN
      docs/cn/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png
  86. BIN
      docs/cn/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png
  87. BIN
      docs/cn/skin/images/rc-t-r-15-1body-2menu-3menu.png
  88. BIN
      docs/cn/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png
  89. BIN
      docs/cn/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png
  90. BIN
      docs/cn/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png
  91. 27 0
      docs/cn/skin/images/rc.svg.xslt
  92. BIN
      docs/cn/skin/images/remove.jpg
  93. BIN
      docs/cn/skin/images/rss.png
  94. BIN
      docs/cn/skin/images/spacer.gif
  95. BIN
      docs/cn/skin/images/success.png
  96. BIN
      docs/cn/skin/images/txtdoc.png
  97. 55 0
      docs/cn/skin/images/txtdoc.svg.xslt
  98. BIN
      docs/cn/skin/images/update.jpg
  99. BIN
      docs/cn/skin/images/valid-html401.png
  100. BIN
      docs/cn/skin/images/vcss.png

+ 5 - 0
CHANGES.txt

@@ -971,6 +971,11 @@ Release 0.18.0 - 2008-08-19
 
 Release 0.17.3 - Unreleased
 
+  IMPROVEMENTS
+
+    HADOOP-4164. Chinese translation of the documentation. (Xuebing Yan via 
+    omalley)
+
   BUG FIXES
 
     HADOOP-4277. Checksum verification was mistakenly disabled for

+ 22 - 0
build.xml

@@ -41,8 +41,10 @@
   <property name="lib.dir" value="${basedir}/lib"/>
   <property name="conf.dir" value="${basedir}/conf"/>
   <property name="docs.dir" value="${basedir}/docs"/>
+  <property name="cndocs.dir" value="${basedir}/docs/cn"/>
   <property name="contrib.dir" value="${basedir}/src/contrib"/>
   <property name="docs.src" value="${basedir}/src/docs"/>
+  <property name="cndocs.src" value="${basedir}/src/docs/cn"/>
   <property name="changes.src" value="${docs.src}/changes"/>
   <property name="c++.src" value="${basedir}/src/c++"/>
   <property name="c++.utils.src" value="${c++.src}/utils"/>
@@ -244,6 +246,10 @@
     <exec executable="sh">
        <arg line="src/saveVersion.sh ${version}"/>
     </exec>
+	
+   <exec executable="sh">
+       <arg line="src/fixFontsPath.sh ${cndocs.src}"/>
+   </exec>
   </target>
 
   <!-- ====================================================== -->
@@ -761,6 +767,21 @@
     <style basedir="${conf.dir}" destdir="${docs.dir}"
            includes="hadoop-default.xml" style="conf/configuration.xsl"/>
     <antcall target="changes-to-html"/>
+    <antcall target="cn-docs"/>
+  </target>
+
+  <target name="cn-docs" depends="forrest.check, init" 
+       description="Generate forrest-based Chinese documentation. To use, specify -Dforrest.home=&lt;base of Apache Forrest installation&gt; on the command line." 
+        if="forrest.home">
+    <exec dir="${cndocs.src}" executable="${forrest.home}/bin/forrest" failonerror="true">
+      <env key="LANG" value="en_US.utf8"/>
+    </exec>
+    <copy todir="${cndocs.dir}">
+      <fileset dir="${cndocs.src}/build/site/" />
+    </copy>
+    <style basedir="${conf.dir}" destdir="${cndocs.dir}"
+          includes="hadoop-default.xml" style="conf/configuration.xsl"/>
+    <antcall target="changes-to-html"/>
   </target>
 
   <target name="forrest.check" unless="forrest.home">
@@ -1004,6 +1025,7 @@
   <target name="clean" depends="clean-contrib" description="Clean.  Delete the build files, and their directories">
     <delete dir="${build.dir}"/>
     <delete dir="${docs.src}/build"/>
+    <delete dir="${cndocs.src}/build"/>
   </target>
 
   <!-- ================================================================== -->

+ 24 - 2
docs/changes.html

@@ -63,7 +63,7 @@ via acmurthy)</li>
     </ol>
   </li>
   <li><a href="javascript:toggleList('release_0.18.3_-_unreleased_._bug_fixes_')">  BUG FIXES
-</a>&nbsp;&nbsp;&nbsp;(19)
+</a>&nbsp;&nbsp;&nbsp;(29)
     <ol id="release_0.18.3_-_unreleased_._bug_fixes_">
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-4499">HADOOP-4499</a>. DFSClient should invoke checksumOk only once.<br />(Raghu Angadi)</li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-4597">HADOOP-4597</a>. Calculate mis-replicated blocks when safe-mode is turned
@@ -81,7 +81,7 @@ live replicas for a block.<br />(hairong)</li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-4647">HADOOP-4647</a>. NamenodeFsck should close the DFSClient it has created.<br />(szetszwo)</li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-4616">HADOOP-4616</a>. Fuse-dfs can handle bad values from FileSystem.read call.<br />(Pete Wyckoff via dhruba)</li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-4061">HADOOP-4061</a>. Throttle Datanode decommission monitoring in Namenode.<br />(szetszwo)</li>
-      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4659">HADOOP-4659</a>. Root cause of connection failure is being ost to code that
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4659">HADOOP-4659</a>. Root cause of connection failure is being lost to code that
 uses it for delaying startup.<br />(Steve Loughran and Hairong via hairong)</li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-4614">HADOOP-4614</a>. Lazily open segments when merging map spills to avoid using
 too many file descriptors.<br />(Yuri Pradkin via cdouglas)</li>
@@ -96,6 +96,21 @@ Kunz via cdouglas)</li>
 between progress reports configurable.<br />(Jothi Padmanabhan via cdouglas)</li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-4726">HADOOP-4726</a>. Fix documentation typos "the the".<br />(Edward J. Yoon via
 szetszwo)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4679">HADOOP-4679</a>. Datanode prints tons of log messages: waiting for threadgroup
+to exit, active threads is XX.<br />(hairong)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4734">HADOOP-4734</a>. Block and meta data validation codes in <a href="http://issues.apache.org/jira/browse/HADOOP-1700">HADOOP-1700</a> should be
+committed to 0.18.<br />(szetszwo)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4746">HADOOP-4746</a>. Job output directory should be normalized.<br />(hairong)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4654">HADOOP-4654</a>. Removes temporary output directory for failed and killed
+tasks in the JobTracker's task commit thread.<br />(Amareshwari Sriramadasu via ddas)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4717">HADOOP-4717</a>. Removal of default port# in NameNode.getUri() causes a
+map/reduce job failed to prompt temporary output.<br />(hairong)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4778">HADOOP-4778</a>. Check for zero size block meta file when updating a block.<br />(szetszwo)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4742">HADOOP-4742</a>. Replica gets deleted by mistake.<br />(Wang Xu via hairong)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4702">HADOOP-4702</a>. Failed block replication leaves an incomplete block in
+receiver's tmp data directory.<br />(hairong)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4613">HADOOP-4613</a>. Fix block browsing on Web UI.<br />(Johan Oskarsson via shv)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4806">HADOOP-4806</a>. HDFS rename should not use src path as a regular expression.<br />(szetszwo)</li>
     </ol>
   </li>
 </ul>
@@ -716,6 +731,13 @@ cdouglas)</li>
 <h3><a href="javascript:toggleList('release_0.17.3_-_unreleased_')">Release 0.17.3 - Unreleased
 </a></h3>
 <ul id="release_0.17.3_-_unreleased_">
+  <li><a href="javascript:toggleList('release_0.17.3_-_unreleased_._improvements_')">  IMPROVEMENTS
+</a>&nbsp;&nbsp;&nbsp;(1)
+    <ol id="release_0.17.3_-_unreleased_._improvements_">
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4164">HADOOP-4164</a>. Chinese translation of the documentation.<br />(Xuebing Yan via
+omalley)</li>
+    </ol>
+  </li>
   <li><a href="javascript:toggleList('release_0.17.3_-_unreleased_._bug_fixes_')">  BUG FIXES
 </a>&nbsp;&nbsp;&nbsp;(4)
     <ol id="release_0.17.3_-_unreleased_._bug_fixes_">

+ 2 - 0
docs/cn/broken-links.xml

@@ -0,0 +1,2 @@
+<broken-links>
+</broken-links>

+ 730 - 0
docs/cn/cluster_setup.html

@@ -0,0 +1,730 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Hadoop集群搭建</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://hadoop.apache.org/core/">项目</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop">维基</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.18文档</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">文档</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">概述</a>
+</div>
+<div class="menuitem">
+<a href="quickstart.html">快速入门</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">集群搭建</div>
+</div>
+<div class="menuitem">
+<a href="hdfs_design.html">HDFS构架设计</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_user_guide.html">HDFS使用指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_permissions_guide.html">HDFS权限指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_quota_admin_guide.html">HDFS配额管理指南</a>
+</div>
+<div class="menuitem">
+<a href="commands_manual.html">命令手册</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_shell.html">FS Shell使用指南</a>
+</div>
+<div class="menuitem">
+<a href="distcp.html">DistCp使用指南</a>
+</div>
+<div class="menuitem">
+<a href="mapred_tutorial.html">Map-Reduce教程</a>
+</div>
+<div class="menuitem">
+<a href="native_libraries.html">Hadoop本地库</a>
+</div>
+<div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
+<a href="hadoop_archives.html">Hadoop Archives</a>
+</div>
+<div class="menuitem">
+<a href="hod.html">Hadoop On Demand</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/">维基</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">发行说明</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">变更日志</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="cluster_setup.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Hadoop集群搭建</h1>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#%E7%9B%AE%E7%9A%84">目的</a>
+</li>
+<li>
+<a href="#%E5%85%88%E5%86%B3%E6%9D%A1%E4%BB%B6">先决条件</a>
+</li>
+<li>
+<a href="#%E5%AE%89%E8%A3%85">安装</a>
+</li>
+<li>
+<a href="#%E9%85%8D%E7%BD%AE">配置</a>
+<ul class="minitoc">
+<li>
+<a href="#%E9%85%8D%E7%BD%AE%E6%96%87%E4%BB%B6">配置文件</a>
+</li>
+<li>
+<a href="#%E9%9B%86%E7%BE%A4%E9%85%8D%E7%BD%AE">集群配置</a>
+<ul class="minitoc">
+<li>
+<a href="#%E9%85%8D%E7%BD%AEHadoop%E5%AE%88%E6%8A%A4%E8%BF%9B%E7%A8%8B%E7%9A%84%E8%BF%90%E8%A1%8C%E7%8E%AF%E5%A2%83">配置Hadoop守护进程的运行环境</a>
+</li>
+<li>
+<a href="#%E9%85%8D%E7%BD%AEHadoop%E5%AE%88%E6%8A%A4%E8%BF%9B%E7%A8%8B%E7%9A%84%E8%BF%90%E8%A1%8C%E5%8F%82%E6%95%B0">配置Hadoop守护进程的运行参数</a>
+</li>
+<li>
+<a href="#Slaves">Slaves</a>
+</li>
+<li>
+<a href="#%E6%97%A5%E5%BF%97">日志</a>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+<li>
+<a href="#Hadoop%E7%9A%84%E6%9C%BA%E6%9E%B6%E6%84%9F%E7%9F%A5">Hadoop的机架感知</a>
+</li>
+<li>
+<a href="#%E5%90%AF%E5%8A%A8Hadoop">启动Hadoop</a>
+</li>
+<li>
+<a href="#%E5%81%9C%E6%AD%A2Hadoop">停止Hadoop</a>
+</li>
+</ul>
+</div>
+  
+    
+<a name="N1000D"></a><a name="%E7%9B%AE%E7%9A%84"></a>
+<h2 class="h3">目的</h2>
+<div class="section">
+<p>本文描述了如何安装、配置和管理有实际意义的Hadoop集群,其规模可从几个节点的小集群到几千个节点的超大集群。</p>
+<p>如果你希望在单机上安装Hadoop玩玩,从<a href="quickstart.html">这里</a>能找到相关细节。</p>
+</div>
+    
+    
+<a name="N1001E"></a><a name="%E5%85%88%E5%86%B3%E6%9D%A1%E4%BB%B6"></a>
+<h2 class="h3">先决条件</h2>
+<div class="section">
+<ol>
+        
+<li>
+          确保在你集群中的每个节点上都安装了所有<a href="quickstart.html#PreReqs">必需</a>软件。
+        </li>
+        
+<li>
+          
+<a href="quickstart.html#%E4%B8%8B%E8%BD%BD">获取</a>Hadoop软件包。
+        </li>
+      
+</ol>
+</div>
+    
+    
+<a name="N10036"></a><a name="%E5%AE%89%E8%A3%85"></a>
+<h2 class="h3">安装</h2>
+<div class="section">
+<p>安装Hadoop集群通常要将安装软件解压到集群内的所有机器上。</p>
+<p>通常,集群里的一台机器被指定为 
+	 <span class="codefrag">NameNode</span>,另一台不同的机器被指定为<span class="codefrag">JobTracker</span>。这些机器是<em>masters</em>。余下的机器即作为<span class="codefrag">DataNode</span><em>也</em>作为<span class="codefrag">TaskTracker</span>。这些机器是<em>slaves</em>。</p>
+<p>我们用<span class="codefrag">HADOOP_HOME</span>指代安装的根路径。通常,集群里的所有机器的<span class="codefrag">HADOOP_HOME</span>路径相同。</p>
+</div>
+    
+    
+<a name="N10060"></a><a name="%E9%85%8D%E7%BD%AE"></a>
+<h2 class="h3">配置</h2>
+<div class="section">
+<p>接下来的几节描述了如何配置Hadoop集群。</p>
+<a name="N10069"></a><a name="%E9%85%8D%E7%BD%AE%E6%96%87%E4%BB%B6"></a>
+<h3 class="h4">配置文件</h3>
+<p>对Hadoop的配置通过<span class="codefrag">conf/</span>目录下的两个重要配置文件完成:</p>
+<ol>
+          
+<li>
+            
+<a href="http://hadoop.apache.org/core/docs/current/hadoop-default.html">hadoop-default.xml</a> - 只读的默认配置。
+          </li>
+          
+<li>
+            
+<em>hadoop-site.xml</em> - 集群特有的配置。
+          </li>
+        
+</ol>
+<p>要了解更多关于这些配置文件如何影响Hadoop框架的细节,请看<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/conf/Configuration.html">这里</a>。</p>
+<p>此外,通过设置<span class="codefrag">conf/hadoop-env.sh</span>中的变量为集群特有的值,你可以对<span class="codefrag">bin/</span>目录下的Hadoop脚本进行控制。</p>
+<a name="N10096"></a><a name="%E9%9B%86%E7%BE%A4%E9%85%8D%E7%BD%AE"></a>
+<h3 class="h4">集群配置</h3>
+<p>要配置Hadoop集群,你需要设置Hadoop守护进程的<em>运行环境</em>和Hadoop守护进程的<em>运行参数</em>。</p>
+<p>Hadoop守护进程指<span class="codefrag">NameNode</span>/<span class="codefrag">DataNode</span> 
+        和<span class="codefrag">JobTracker</span>/<span class="codefrag">TaskTracker</span>。</p>
+<a name="N100B4"></a><a name="%E9%85%8D%E7%BD%AEHadoop%E5%AE%88%E6%8A%A4%E8%BF%9B%E7%A8%8B%E7%9A%84%E8%BF%90%E8%A1%8C%E7%8E%AF%E5%A2%83"></a>
+<h4>配置Hadoop守护进程的运行环境</h4>
+<p>管理员可在<span class="codefrag">conf/hadoop-env.sh</span>脚本内对Hadoop守护进程的运行环境做特别指定。</p>
+<p>至少,你得设定<span class="codefrag">JAVA_HOME</span>使之在每一远端节点上都被正确设置。</p>
+<p>管理员可以通过配置选项<span class="codefrag">HADOOP_*_OPTS</span>来分别配置各个守护进程。
+          下表是可以配置的选项。
+          </p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+          
+<tr>
+<th colspan="1" rowspan="1">守护进程</th><th colspan="1" rowspan="1">配置选项</th>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1">NameNode</td><td colspan="1" rowspan="1">HADOOP_NAMENODE_OPTS</td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1">DataNode</td><td colspan="1" rowspan="1">HADOOP_DATANODE_OPTS</td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1">SecondaryNamenode</td>
+              <td colspan="1" rowspan="1">HADOOP_SECONDARYNAMENODE_OPTS</td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1">JobTracker</td><td colspan="1" rowspan="1">HADOOP_JOBTRACKER_OPTS</td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1">TaskTracker</td><td colspan="1" rowspan="1">HADOOP_TASKTRACKER_OPTS</td>
+</tr>
+          
+</table>
+<p>例如,配置Namenode时,为了使其能够并行回收垃圾(parallelGC),
+          要把下面的代码加入到<span class="codefrag">hadoop-env.sh</span> :
+          <br>
+<span class="codefrag">
+          export HADOOP_NAMENODE_OPTS="-XX:+UseParallelGC ${HADOOP_NAMENODE_OPTS}"
+          </span>
+<br>
+</p>
+<p>其它可定制的常用参数还包括:</p>
+<ul>
+            
+<li>
+              
+<span class="codefrag">HADOOP_LOG_DIR</span> - 守护进程日志文件的存放目录。如果不存在会被自动创建。
+            </li>
+            
+<li>
+              
+<span class="codefrag">HADOOP_HEAPSIZE</span> - 最大可用的堆大小,单位为MB。比如,<span class="codefrag">1000MB</span>。
+              这个参数用于设置hadoop守护进程的堆大小。缺省大小是<span class="codefrag">1000MB</span>。
+            </li>
+          
+</ul>
+<a name="N1012F"></a><a name="%E9%85%8D%E7%BD%AEHadoop%E5%AE%88%E6%8A%A4%E8%BF%9B%E7%A8%8B%E7%9A%84%E8%BF%90%E8%A1%8C%E5%8F%82%E6%95%B0"></a>
+<h4>配置Hadoop守护进程的运行参数</h4>
+<p>这部分涉及Hadoop集群的重要参数,这些参数在<span class="codefrag">conf/hadoop-site.xml</span>中指定。</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+  		    
+<tr>
+		      
+<th colspan="1" rowspan="1">参数</th>
+		      <th colspan="1" rowspan="1">取值</th> 
+		      <th colspan="1" rowspan="1">备注</th>
+		    
+</tr>
+  		    
+<tr>
+		      
+<td colspan="1" rowspan="1">fs.default.name</td>
+                       <td colspan="1" rowspan="1"><span class="codefrag">NameNode</span>的URI。</td>
+                       <td colspan="1" rowspan="1"><em>hdfs://主机名/</em></td>
+		    
+</tr>
+		    
+<tr>
+		      
+<td colspan="1" rowspan="1">mapred.job.tracker</td>
+		      <td colspan="1" rowspan="1"><span class="codefrag">JobTracker</span>的主机(或者IP)和端口。</td>
+		      <td colspan="1" rowspan="1"><em>主机:端口</em>。</td>
+		    
+</tr>
+		    
+<tr>
+		      
+<td colspan="1" rowspan="1">dfs.name.dir</td>
+		      <td colspan="1" rowspan="1">
+		        <span class="codefrag">NameNode</span>持久存储名字空间及事务日志的本地文件系统路径。</td>
+		      <td colspan="1" rowspan="1">当这个值是一个逗号分割的目录列表时,nametable数据将会被复制到所有目录中做冗余备份。
+		      </td>
+		    
+</tr>
+		    
+<tr>
+		      
+<td colspan="1" rowspan="1">dfs.data.dir</td>
+		      <td colspan="1" rowspan="1"> 
+		        <span class="codefrag">DataNode</span>存放块数据的本地文件系统路径,逗号分割的列表。
+		      </td>
+		      <td colspan="1" rowspan="1">
+		        当这个值是逗号分割的目录列表时,数据将被存储在所有目录下,通常分布在不同设备上。
+		      </td>
+		    
+</tr>
+		    
+<tr>
+		      
+<td colspan="1" rowspan="1">mapred.system.dir</td>
+		      <td colspan="1" rowspan="1">Map/Reduce框架存储系统文件的HDFS路径。比如<span class="codefrag">/hadoop/mapred/system/</span>。
+		      </td>
+		      <td colspan="1" rowspan="1">这个路径是默认文件系统(HDFS)下的路径, 须从服务器和客户端上均可访问。
+		      </td>
+		    
+</tr>
+		    
+<tr>
+		      
+<td colspan="1" rowspan="1">mapred.local.dir</td>
+		      <td colspan="1" rowspan="1">本地文件系统下逗号分割的路径列表,Map/Reduce临时数据存放的地方。
+		      </td>
+		      <td colspan="1" rowspan="1">多路径有助于利用磁盘i/o。</td>
+		    
+</tr>
+		    
+<tr>
+		      
+<td colspan="1" rowspan="1">mapred.tasktracker.{map|reduce}.tasks.maximum</td>
+		      <td colspan="1" rowspan="1">某一<span class="codefrag">TaskTracker</span>上可运行的最大Map/Reduce任务数,这些任务将同时各自运行。
+		      </td>
+		      <td colspan="1" rowspan="1">
+		        默认为2(2个map和2个reduce),可依据硬件情况更改。
+		      </td>
+		    
+</tr>
+		    
+<tr>
+		      
+<td colspan="1" rowspan="1">dfs.hosts/dfs.hosts.exclude</td>
+		      <td colspan="1" rowspan="1">许可/拒绝DataNode列表。</td>
+		      <td colspan="1" rowspan="1">
+		        如有必要,用这个文件控制许可的datanode列表。
+		      </td>
+		    
+</tr>
+		    
+<tr>
+		      
+<td colspan="1" rowspan="1">mapred.hosts/mapred.hosts.exclude</td>
+		      <td colspan="1" rowspan="1">许可/拒绝TaskTracker列表。</td>
+		      <td colspan="1" rowspan="1">
+		        如有必要,用这个文件控制许可的TaskTracker列表。
+		      </td>
+  		    
+</tr>
+		  
+</table>
+<p>通常,上述参数被标记为 
+          <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/conf/Configuration.html#FinalParams">
+          final</a> 以确保它们不被用户应用更改。
+          </p>
+<a name="N1020C"></a><a name="%E7%8E%B0%E5%AE%9E%E4%B8%96%E7%95%8C%E7%9A%84%E9%9B%86%E7%BE%A4%E9%85%8D%E7%BD%AE"></a>
+<h5>现实世界的集群配置</h5>
+<p>这节罗列在大规模集群上运行<em>sort</em>基准测试(benchmark)时使用到的一些非缺省配置。</p>
+<ul>
+              
+<li>
+                
+<p>运行sort900的一些非缺省配置值,sort900即在900个节点的集群上对9TB的数据进行排序:</p>
+                
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+  		          
+<tr>
+		            
+<th colspan="1" rowspan="1">参数</th>
+		            <th colspan="1" rowspan="1">取值</th> 
+		            <th colspan="1" rowspan="1">备注</th>
+		          
+</tr>
+                  
+<tr>
+                    
+<td colspan="1" rowspan="1">dfs.block.size</td>
+                    <td colspan="1" rowspan="1">134217728</td>
+                    <td colspan="1" rowspan="1">针对大文件系统,HDFS的块大小取128MB。</td>
+                  
+</tr>
+                  
+<tr>
+                    
+<td colspan="1" rowspan="1">dfs.namenode.handler.count</td>
+                    <td colspan="1" rowspan="1">40</td>
+                    <td colspan="1" rowspan="1">
+                      启动更多的NameNode服务线程去处理来自大量DataNode的RPC请求。
+                    </td>
+                  
+</tr>
+                  
+<tr>
+                    
+<td colspan="1" rowspan="1">mapred.reduce.parallel.copies</td>
+                    <td colspan="1" rowspan="1">20</td>
+                    <td colspan="1" rowspan="1">
+			reduce启动更多的并行拷贝器以获取大量map的输出。
+                    </td>
+                  
+</tr>
+                  
+<tr>
+                    
+<td colspan="1" rowspan="1">mapred.child.java.opts</td>
+                    <td colspan="1" rowspan="1">-Xmx512M</td>
+                    <td colspan="1" rowspan="1">
+			为map/reduce子虚拟机使用更大的堆。 
+                    </td>
+                  
+</tr>
+                  
+<tr>
+                    
+<td colspan="1" rowspan="1">fs.inmemory.size.mb</td>
+                    <td colspan="1" rowspan="1">200</td>
+                    <td colspan="1" rowspan="1">
+                      为reduce阶段合并map输出所需的内存文件系统分配更多的内存。
+                    </td>
+                  
+</tr>
+                  
+<tr>
+                    
+<td colspan="1" rowspan="1">io.sort.factor</td>
+                    <td colspan="1" rowspan="1">100</td>
+                    <td colspan="1" rowspan="1">文件排序时更多的流将同时被归并。</td>
+                  
+</tr>
+                  
+<tr>
+                    
+<td colspan="1" rowspan="1">io.sort.mb</td>
+                    <td colspan="1" rowspan="1">200</td>
+                    <td colspan="1" rowspan="1">提高排序时的内存上限。</td>
+                  
+</tr>
+                  
+<tr>
+                    
+<td colspan="1" rowspan="1">io.file.buffer.size</td>
+                    <td colspan="1" rowspan="1">131072</td>
+                    <td colspan="1" rowspan="1">SequenceFile中用到的读/写缓存大小。</td>
+                  
+</tr>
+                
+</table>
+              
+</li>
+              
+<li>
+                
+<p>运行sort1400和sort2000时需要更新的配置,即在1400个节点上对14TB的数据进行排序和在2000个节点上对20TB的数据进行排序:</p>
+                
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+  		          
+<tr>
+		            
+<th colspan="1" rowspan="1">参数</th>
+		            <th colspan="1" rowspan="1">取值</th> 
+		            <th colspan="1" rowspan="1">备注</th>
+		          
+</tr>
+                  
+<tr>
+                    
+<td colspan="1" rowspan="1">mapred.job.tracker.handler.count</td>
+                    <td colspan="1" rowspan="1">60</td>
+                    <td colspan="1" rowspan="1">
+                      启用更多的JobTracker服务线程去处理来自大量TaskTracker的RPC请求。
+                    </td>
+                  
+</tr>
+                  
+<tr>
+                    
+<td colspan="1" rowspan="1">mapred.reduce.parallel.copies</td>
+                    <td colspan="1" rowspan="1">50</td>
+                    <td colspan="1" rowspan="1"></td>
+                  
+</tr>
+                  
+<tr>
+                    
+<td colspan="1" rowspan="1">tasktracker.http.threads</td>
+                    <td colspan="1" rowspan="1">50</td>
+                    <td colspan="1" rowspan="1">
+                      为TaskTracker的Http服务启用更多的工作线程。reduce通过Http服务获取map的中间输出。
+                    </td>
+                  
+</tr>
+                  
+<tr>
+                    
+<td colspan="1" rowspan="1">mapred.child.java.opts</td>
+                    <td colspan="1" rowspan="1">-Xmx1024M</td>
+                    <td colspan="1" rowspan="1">使用更大的堆用于maps/reduces的子虚拟机</td>
+                  
+</tr>
+                
+</table>
+              
+</li>
+            
+</ul>
+<a name="N1032A"></a><a name="Slaves"></a>
+<h4>Slaves</h4>
+<p>通常,你选择集群中的一台机器作为<span class="codefrag">NameNode</span>,另外一台不同的机器作为<span class="codefrag">JobTracker</span>。余下的机器即作为<span class="codefrag">DataNode</span>又作为<span class="codefrag">TaskTracker</span>,这些被称之为<em>slaves</em>。</p>
+<p>在<span class="codefrag">conf/slaves</span>文件中列出所有slave的主机名或者IP地址,一行一个。</p>
+<a name="N10349"></a><a name="%E6%97%A5%E5%BF%97"></a>
+<h4>日志</h4>
+<p>Hadoop使用<a href="http://logging.apache.org/log4j/">Apache log4j</a>来记录日志,它由<a href="http://commons.apache.org/logging/">Apache Commons Logging</a>框架来实现。编辑<span class="codefrag">conf/log4j.properties</span>文件可以改变Hadoop守护进程的日志配置(日志格式等)。</p>
+<a name="N1035D"></a><a name="%E5%8E%86%E5%8F%B2%E6%97%A5%E5%BF%97"></a>
+<h5>历史日志</h5>
+<p>作业的历史文件集中存放在<span class="codefrag">hadoop.job.history.location</span>,这个也可以是在分布式文件系统下的路径,其默认值为<span class="codefrag">${HADOOP_LOG_DIR}/history</span>。jobtracker的web UI上有历史日志的web UI链接。</p>
+<p>历史文件在用户指定的目录<span class="codefrag">hadoop.job.history.user.location</span>也会记录一份,这个配置的缺省值为作业的输出目录。这些文件被存放在指定路径下的&ldquo;_logs/history/&rdquo;目录中。因此,默认情况下日志文件会在&ldquo;mapred.output.dir/_logs/history/&rdquo;下。如果将<span class="codefrag">hadoop.job.history.user.location</span>指定为值<span class="codefrag">none</span>,系统将不再记录此日志。</p>
+<p>用户可使用以下命令在指定路径下查看历史日志汇总<br>
+            
+<span class="codefrag">$ bin/hadoop job -history output-dir</span>
+<br> 
+            这条命令会显示作业的细节信息,失败和终止的任务细节。 <br>
+            关于作业的更多细节,比如成功的任务,以及对每个任务的所做的尝试次数等可以用下面的命令查看<br>
+            
+<span class="codefrag">$ bin/hadoop job -history all output-dir</span>
+<br>
+</p>
+<p>一但全部必要的配置完成,将这些文件分发到所有机器的<span class="codefrag">HADOOP_CONF_DIR</span>路径下,通常是<span class="codefrag">${HADOOP_HOME}/conf</span>。</p>
+</div>
+    
+    
+<a name="N10395"></a><a name="Hadoop%E7%9A%84%E6%9C%BA%E6%9E%B6%E6%84%9F%E7%9F%A5"></a>
+<h2 class="h3">Hadoop的机架感知</h2>
+<div class="section">
+<p>HDFS和Map/Reduce的组件是能够感知机架的。</p>
+<p>
+<span class="codefrag">NameNode</span>和<span class="codefrag">JobTracker</span>通过调用管理员配置模块中的API<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/net/DNSToSwitchMapping.html#resolve(java.util.List)">resolve</a>来获取集群里每个slave的<span class="codefrag">机架id</span>。该API将slave的DNS名称(或者IP地址)转换成机架id。使用哪个模块是通过配置项<span class="codefrag">topology.node.switch.mapping.impl</span>来指定的。模块的默认实现会调用<span class="codefrag">topology.script.file.name</span>配置项指定的一个的脚本/命令。 如果topology.script.file.name未被设置,对于所有传入的IP地址,模块会返回<span class="codefrag">/default-rack</span>作为机架id。在Map/Reduce部分还有一个额外的配置项<span class="codefrag">mapred.cache.task.levels</span>,该参数决定cache的级数(在网络拓扑中)。例如,如果默认值是2,会建立两级的cache- 一级针对主机(主机 -&gt; 任务的映射)另一级针对机架(机架 -&gt; 任务的映射)。
+      </p>
+</div>
+    
+    
+<a name="N103BA"></a><a name="%E5%90%AF%E5%8A%A8Hadoop"></a>
+<h2 class="h3">启动Hadoop</h2>
+<div class="section">
+<p>启动Hadoop集群需要启动HDFS集群和Map/Reduce集群。</p>
+<p>
+        格式化一个新的分布式文件系统:<br>
+        
+<span class="codefrag">$ bin/hadoop namenode -format</span>
+      
+</p>
+<p>
+	在分配的<span class="codefrag">NameNode</span>上,运行下面的命令启动HDFS:<br>
+        
+<span class="codefrag">$ bin/start-dfs.sh</span>
+      
+</p>
+<p>
+<span class="codefrag">bin/start-dfs.sh</span>脚本会参照<span class="codefrag">NameNode</span>上<span class="codefrag">${HADOOP_CONF_DIR}/slaves</span>文件的内容,在所有列出的slave上启动<span class="codefrag">DataNode</span>守护进程。</p>
+<p>
+	在分配的<span class="codefrag">JobTracker</span>上,运行下面的命令启动Map/Reduce:<br>
+        
+<span class="codefrag">$ bin/start-mapred.sh</span>
+      
+</p>
+<p>
+<span class="codefrag">bin/start-mapred.sh</span>脚本会参照<span class="codefrag">JobTracker</span>上<span class="codefrag">${HADOOP_CONF_DIR}/slaves</span>文件的内容,在所有列出的slave上启动<span class="codefrag">TaskTracker</span>守护进程。</p>
+</div>
+    
+    
+<a name="N103FE"></a><a name="%E5%81%9C%E6%AD%A2Hadoop"></a>
+<h2 class="h3">停止Hadoop</h2>
+<div class="section">
+<p>
+	在分配的<span class="codefrag">NameNode</span>上,执行下面的命令停止HDFS:<br>
+        
+<span class="codefrag">$ bin/stop-dfs.sh</span>
+      
+</p>
+<p>
+<span class="codefrag">bin/stop-dfs.sh</span>脚本会参照<span class="codefrag">NameNode</span>上<span class="codefrag">${HADOOP_CONF_DIR}/slaves</span>文件的内容,在所有列出的slave上停止<span class="codefrag">DataNode</span>守护进程。</p>
+<p>
+	在分配的<span class="codefrag">JobTracker</span>上,运行下面的命令停止Map/Reduce:<br>
+        
+<span class="codefrag">$ bin/stop-mapred.sh</span>
+<br>
+      
+</p>
+<p>
+<span class="codefrag">bin/stop-mapred.sh</span>脚本会参照<span class="codefrag">JobTracker</span>上<span class="codefrag">${HADOOP_CONF_DIR}/slaves</span>文件的内容,在所有列出的slave上停止<span class="codefrag">TaskTracker</span>守护进程。</p>
+</div>
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 209 - 0
docs/cn/cluster_setup.pdf


+ 1116 - 0
docs/cn/commands_manual.html

@@ -0,0 +1,1116 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title>命令手册</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://hadoop.apache.org/core/">项目</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop">维基</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.18文档</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">文档</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">概述</a>
+</div>
+<div class="menuitem">
+<a href="quickstart.html">快速入门</a>
+</div>
+<div class="menuitem">
+<a href="cluster_setup.html">集群搭建</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_design.html">HDFS构架设计</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_user_guide.html">HDFS使用指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_permissions_guide.html">HDFS权限指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_quota_admin_guide.html">HDFS配额管理指南</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">命令手册</div>
+</div>
+<div class="menuitem">
+<a href="hdfs_shell.html">FS Shell使用指南</a>
+</div>
+<div class="menuitem">
+<a href="distcp.html">DistCp使用指南</a>
+</div>
+<div class="menuitem">
+<a href="mapred_tutorial.html">Map-Reduce教程</a>
+</div>
+<div class="menuitem">
+<a href="native_libraries.html">Hadoop本地库</a>
+</div>
+<div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
+<a href="hadoop_archives.html">Hadoop Archives</a>
+</div>
+<div class="menuitem">
+<a href="hod.html">Hadoop On Demand</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/">维基</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">发行说明</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">变更日志</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="commands_manual.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>命令手册</h1>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#%E6%A6%82%E8%BF%B0">概述</a>
+<ul class="minitoc">
+<li>
+<a href="#%E5%B8%B8%E8%A7%84%E9%80%89%E9%A1%B9">常规选项</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#%E7%94%A8%E6%88%B7%E5%91%BD%E4%BB%A4"> 用户命令 </a>
+<ul class="minitoc">
+<li>
+<a href="#archive"> archive </a>
+</li>
+<li>
+<a href="#distcp"> distcp </a>
+</li>
+<li>
+<a href="#fs"> fs </a>
+</li>
+<li>
+<a href="#fsck"> fsck </a>
+</li>
+<li>
+<a href="#jar"> jar </a>
+</li>
+<li>
+<a href="#job"> job </a>
+</li>
+<li>
+<a href="#pipes"> pipes </a>
+</li>
+<li>
+<a href="#version"> version </a>
+</li>
+<li>
+<a href="#CLASSNAME"> CLASSNAME </a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#%E7%AE%A1%E7%90%86%E5%91%BD%E4%BB%A4">管理命令</a>
+<ul class="minitoc">
+<li>
+<a href="#balancer"> balancer </a>
+</li>
+<li>
+<a href="#daemonlog"> daemonlog </a>
+</li>
+<li>
+<a href="#datanode"> datanode</a>
+</li>
+<li>
+<a href="#dfsadmin"> dfsadmin </a>
+</li>
+<li>
+<a href="#jobtracker"> jobtracker </a>
+</li>
+<li>
+<a href="#namenode"> namenode </a>
+</li>
+<li>
+<a href="#secondarynamenode"> secondarynamenode </a>
+</li>
+<li>
+<a href="#tasktracker"> tasktracker </a>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+		
+<a name="N1000D"></a><a name="%E6%A6%82%E8%BF%B0"></a>
+<h2 class="h3">概述</h2>
+<div class="section">
+<p>
+				所有的hadoop命令均由bin/hadoop脚本引发。不指定参数运行hadoop脚本会打印所有命令的描述。
+			</p>
+<p>
+				
+<span class="codefrag">用法:hadoop [--config confdir] [COMMAND] [GENERIC_OPTIONS] [COMMAND_OPTIONS]</span>
+			
+</p>
+<p>
+				Hadoop有一个选项解析框架用于解析一般的选项和运行类。
+			</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+			          
+<tr>
+<th colspan="1" rowspan="1"> 命令选项 </th><th colspan="1" rowspan="1"> 描述 </th>
+</tr>
+			
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">--config confdir</span></td>
+			            <td colspan="1" rowspan="1">覆盖缺省配置目录。缺省是${HADOOP_HOME}/conf。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">GENERIC_OPTIONS</span></td>
+			            <td colspan="1" rowspan="1">多个命令都支持的通用选项。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">COMMAND</span>
+<br>
+<span class="codefrag">命令选项S</span></td>
+			            <td colspan="1" rowspan="1">各种各样的命令和它们的选项会在下面提到。这些命令被分为
+			             <a href="commands_manual.html#%E7%94%A8%E6%88%B7%E5%91%BD%E4%BB%A4">用户命令</a> 
+			             <a href="commands_manual.html#%E7%AE%A1%E7%90%86%E5%91%BD%E4%BB%A4">管理命令</a>两组。</td>
+			           
+</tr>
+			     
+</table>
+<a name="N10061"></a><a name="%E5%B8%B8%E8%A7%84%E9%80%89%E9%A1%B9"></a>
+<h3 class="h4">常规选项</h3>
+<p>
+				  下面的选项被
+				  <a href="commands_manual.html#dfsadmin">dfsadmin</a>, 
+				  <a href="commands_manual.html#fs">fs</a>, <a href="commands_manual.html#fsck">fsck</a>和 
+				  <a href="commands_manual.html#job">job</a>支持。 
+				  应用程序要实现
+				  <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/util/Tool.html">Tool</a>来支持
+				  <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/util/GenericOptionsParser.html">
+				  常规选项</a>。
+				</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+			          
+<tr>
+<th colspan="1" rowspan="1"> GENERIC_OPTION </th><th colspan="1" rowspan="1"> 描述 </th>
+</tr>
+			
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-conf &lt;configuration file&gt;</span></td>
+			            <td colspan="1" rowspan="1">指定应用程序的配置文件。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-D &lt;property=value&gt;</span></td>
+			            <td colspan="1" rowspan="1">为指定property指定值value。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-fs &lt;local|namenode:port&gt;</span></td>
+			            <td colspan="1" rowspan="1">指定namenode。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-jt &lt;local|jobtracker:port&gt;</span></td>
+			            <td colspan="1" rowspan="1">指定job tracker。只适用于<a href="commands_manual.html#job">job</a>。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-files &lt;逗号分隔的文件列表&gt;</span></td>
+			            <td colspan="1" rowspan="1">指定要拷贝到map reduce集群的文件的逗号分隔的列表。
+			            只适用于<a href="commands_manual.html#job">job</a>。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-libjars &lt;逗号分隔的jar列表&gt;</span></td>
+			            <td colspan="1" rowspan="1">指定要包含到classpath中的jar文件的逗号分隔的列表。
+			            只适用于<a href="commands_manual.html#job">job</a>。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-archives &lt;逗号分隔的archive列表&gt;</span></td>
+			            <td colspan="1" rowspan="1">指定要被解压到计算节点上的档案文件的逗号分割的列表。
+			            只适用于<a href="commands_manual.html#job">job</a>。</td>
+			           
+</tr>
+				
+</table>
+</div>
+		
+		
+<a name="N10103"></a><a name="%E7%94%A8%E6%88%B7%E5%91%BD%E4%BB%A4"></a>
+<h2 class="h3"> 用户命令 </h2>
+<div class="section">
+<p>hadoop集群用户的常用命令。</p>
+<a name="N1010C"></a><a name="archive"></a>
+<h3 class="h4"> archive </h3>
+<p>
+					创建一个hadoop档案文件。参考 <a href="hadoop_archives.html">Hadoop Archives</a>.
+				</p>
+<p>
+					
+<span class="codefrag">用法:hadoop archive -archiveName NAME &lt;src&gt;* &lt;dest&gt;</span>
+				
+</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+			          
+<tr>
+<th colspan="1" rowspan="1"> 命令选项 </th><th colspan="1" rowspan="1"> 描述</th>
+</tr>
+					   
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-archiveName NAME</span></td>
+			            <td colspan="1" rowspan="1">要创建的档案的名字。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">src</span></td>
+			            <td colspan="1" rowspan="1">文件系统的路径名,和通常含正则表达的一样。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">dest</span></td>
+			            <td colspan="1" rowspan="1">保存档案文件的目标目录。</td>
+			           
+</tr>
+			     
+</table>
+<a name="N10157"></a><a name="distcp"></a>
+<h3 class="h4"> distcp </h3>
+<p>
+					递归地拷贝文件或目录。参考<a href="distcp.html">DistCp指南</a>以获取等多信息。
+				</p>
+<p>
+					
+<span class="codefrag">用法:hadoop distcp &lt;srcurl&gt; &lt;desturl&gt;</span>
+				
+</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+			          
+<tr>
+<th colspan="1" rowspan="1"> 命令选项 </th><th colspan="1" rowspan="1"> 描述</th>
+</tr>
+			
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">srcurl</span></td>
+			            <td colspan="1" rowspan="1">源Url</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">desturl</span></td>
+			            <td colspan="1" rowspan="1">目标Url</td>
+			           
+</tr>
+			     
+</table>
+<a name="N10194"></a><a name="fs"></a>
+<h3 class="h4"> fs </h3>
+<p>
+					
+<span class="codefrag">用法:hadoop fs [</span><a href="commands_manual.html#%E5%B8%B8%E8%A7%84%E9%80%89%E9%A1%B9">GENERIC_OPTIONS</a><span class="codefrag">] 
+					[COMMAND_OPTIONS]</span>
+				
+</p>
+<p>
+					运行一个常规的文件系统客户端。
+				</p>
+<p>
+					各种命令选项可以参考<a href="hdfs_shell.html">HDFS Shell指南</a>。
+				</p>
+<a name="N101B0"></a><a name="fsck"></a>
+<h3 class="h4"> fsck </h3>
+<p>
+					运行HDFS文件系统检查工具。参考<a href="hdfs_user_guide.html#fsck">Fsck</a>了解更多。
+				</p>
+<p>
+<span class="codefrag">用法:hadoop fsck [</span><a href="commands_manual.html#%E5%B8%B8%E8%A7%84%E9%80%89%E9%A1%B9">GENERIC_OPTIONS</a><span class="codefrag">] 
+				&lt;path&gt; [-move | -delete | -openforwrite] [-files [-blocks 
+				[-locations | -racks]]]</span>
+</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+			          
+<tr>
+<th colspan="1" rowspan="1"> 命令选项 </th><th colspan="1" rowspan="1"> 描述 </th>
+</tr>
+			          
+<tr>
+			            
+<td colspan="1" rowspan="1"><span class="codefrag">&lt;path&gt;</span></td>
+			            <td colspan="1" rowspan="1">检查的起始目录。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-move</span></td>
+			            <td colspan="1" rowspan="1">移动受损文件到/lost+found</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-delete</span></td>
+			            <td colspan="1" rowspan="1">删除受损文件。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-openforwrite</span></td>
+			            <td colspan="1" rowspan="1">打印出写打开的文件。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-files</span></td>
+			            <td colspan="1" rowspan="1">打印出正被检查的文件。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-blocks</span></td>
+			            <td colspan="1" rowspan="1">打印出块信息报告。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-locations</span></td>
+			            <td colspan="1" rowspan="1">打印出每个块的位置信息。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-racks</span></td>
+			            <td colspan="1" rowspan="1">打印出data-node的网络拓扑结构。</td>
+			           
+</tr>
+					
+</table>
+<a name="N10244"></a><a name="jar"></a>
+<h3 class="h4"> jar </h3>
+<p>
+					运行jar文件。用户可以把他们的Map Reduce代码捆绑到jar文件中,使用这个命令执行。
+				</p>
+<p>
+					
+<span class="codefrag">用法:hadoop jar &lt;jar&gt; [mainClass] args...</span>
+				
+</p>
+<p>
+					streaming作业是通过这个命令执行的。参考<a href="streaming.html#%E5%85%B6%E4%BB%96%E4%BE%8B%E5%AD%90">Streaming examples</a>中的例子。
+				</p>
+<p>
+					Word count例子也是通过jar命令运行的。参考<a href="mapred_tutorial.html#%E7%94%A8%E6%B3%95">Wordcount example</a>。
+				</p>
+<a name="N10262"></a><a name="job"></a>
+<h3 class="h4"> job </h3>
+<p>
+					用于和Map Reduce作业交互和命令。
+				</p>
+<p>
+					
+<span class="codefrag">用法:hadoop job [</span><a href="commands_manual.html#%E5%B8%B8%E8%A7%84%E9%80%89%E9%A1%B9">GENERIC_OPTIONS</a><span class="codefrag">] 
+					[-submit &lt;job-file&gt;] | [-status &lt;job-id&gt;] | 
+					[-counter &lt;job-id&gt; &lt;group-name&gt; &lt;counter-name&gt;] | [-kill &lt;job-id&gt;] | 
+					[-events &lt;job-id&gt; &lt;from-event-#&gt; &lt;#-of-events&gt;] | [-history [all] &lt;jobOutputDir&gt;] |
+					[-list [all]] | [-kill-task &lt;task-id&gt;] | [-fail-task &lt;task-id&gt;]</span>
+				
+</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+			          
+<tr>
+<th colspan="1" rowspan="1"> 命令选项 </th><th colspan="1" rowspan="1"> 描述</th>
+</tr>
+			
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-submit &lt;job-file&gt;</span></td>
+			            <td colspan="1" rowspan="1">提交作业</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-status &lt;job-id&gt;</span></td>
+			            <td colspan="1" rowspan="1">打印map和reduce完成百分比和所有计数器。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-counter &lt;job-id&gt; &lt;group-name&gt; &lt;counter-name&gt;</span></td>
+			            <td colspan="1" rowspan="1">打印计数器的值。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-kill &lt;job-id&gt;</span></td>
+			            <td colspan="1" rowspan="1">杀死指定作业。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-events &lt;job-id&gt; &lt;from-event-#&gt; &lt;#-of-events&gt;</span></td>
+			            <td colspan="1" rowspan="1">打印给定范围内jobtracker接收到的事件细节。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-history [all] &lt;jobOutputDir&gt;</span></td>
+			            <td colspan="1" rowspan="1">-history &lt;jobOutputDir&gt; 打印作业的细节、失败及被杀死原因的细节。更多的关于一个作业的细节比如成功的任务,做过的任务尝试等信息可以通过指定[all]选项查看。
+			            </td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-list [all]</span></td>
+			            <td colspan="1" rowspan="1">-list all显示所有作业。-list只显示将要完成的作业。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-kill-task &lt;task-id&gt;</span></td>
+			            <td colspan="1" rowspan="1">杀死任务。被杀死的任务不会不利于失败尝试。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-fail-task &lt;task-id&gt;</span></td>
+			            <td colspan="1" rowspan="1">使任务失败。被失败的任务会对失败尝试不利。</td>
+			           
+</tr>
+					
+</table>
+<a name="N10302"></a><a name="pipes"></a>
+<h3 class="h4"> pipes </h3>
+<p>
+					运行pipes作业。
+				</p>
+<p>
+					
+<span class="codefrag">用法:hadoop pipes [-conf &lt;path&gt;] [-jobconf &lt;key=value&gt;, &lt;key=value&gt;, ...] 
+					[-input &lt;path&gt;] [-output &lt;path&gt;] [-jar &lt;jar file&gt;] [-inputformat &lt;class&gt;] 
+					[-map &lt;class&gt;] [-partitioner &lt;class&gt;] [-reduce &lt;class&gt;] [-writer &lt;class&gt;] 
+					[-program &lt;executable&gt;] [-reduces &lt;num&gt;] </span>
+				
+</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+			          
+<tr>
+<th colspan="1" rowspan="1"> 命令选项 </th><th colspan="1" rowspan="1"> 描述</th>
+</tr>
+			
+			          
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-conf &lt;path&gt;</span></td>
+			            <td colspan="1" rowspan="1">作业的配置</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-jobconf &lt;key=value&gt;, &lt;key=value&gt;, ...</span></td>
+			            <td colspan="1" rowspan="1">增加/覆盖作业的配置项</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-input &lt;path&gt;</span></td>
+			            <td colspan="1" rowspan="1">输入目录</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-output &lt;path&gt;</span></td>
+			            <td colspan="1" rowspan="1">输出目录</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-jar &lt;jar file&gt;</span></td>
+			            <td colspan="1" rowspan="1">Jar文件名</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-inputformat &lt;class&gt;</span></td>
+			            <td colspan="1" rowspan="1">InputFormat类</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-map &lt;class&gt;</span></td>
+			            <td colspan="1" rowspan="1">Java Map类</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-partitioner &lt;class&gt;</span></td>
+			            <td colspan="1" rowspan="1">Java Partitioner</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-reduce &lt;class&gt;</span></td>
+			            <td colspan="1" rowspan="1">Java Reduce类</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-writer &lt;class&gt;</span></td>
+			            <td colspan="1" rowspan="1">Java RecordWriter</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-program &lt;executable&gt;</span></td>
+			            <td colspan="1" rowspan="1">可执行程序的URI</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-reduces &lt;num&gt;</span></td>
+			            <td colspan="1" rowspan="1">reduce个数</td>
+			           
+</tr>
+					
+</table>
+<a name="N103C7"></a><a name="version"></a>
+<h3 class="h4"> version </h3>
+<p>
+					打印版本信息。
+				</p>
+<p>
+					
+<span class="codefrag">用法:hadoop version</span>
+				
+</p>
+<a name="N103D7"></a><a name="CLASSNAME"></a>
+<h3 class="h4"> CLASSNAME </h3>
+<p>
+					 hadoop脚本可用于调调用任何类。
+				</p>
+<p>
+					
+<span class="codefrag">用法:hadoop CLASSNAME</span>
+				
+</p>
+<p>
+					 运行名字为CLASSNAME的类。
+				</p>
+</div>
+		
+		
+<a name="N103EB"></a><a name="%E7%AE%A1%E7%90%86%E5%91%BD%E4%BB%A4"></a>
+<h2 class="h3">管理命令</h2>
+<div class="section">
+<p>hadoop集群管理员常用的命令。</p>
+<a name="N103F4"></a><a name="balancer"></a>
+<h3 class="h4"> balancer </h3>
+<p>
+					运行集群平衡工具。管理员可以简单的按Ctrl-C来停止平衡过程。参考<a href="hdfs_user_guide.html#Rebalancer">Rebalancer</a>了解更多。
+				</p>
+<p>
+					
+<span class="codefrag">用法:hadoop balancer [-threshold &lt;threshold&gt;]</span>
+				
+</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+			          
+<tr>
+<th colspan="1" rowspan="1"> 命令选项 </th><th colspan="1" rowspan="1"> 描述</th>
+</tr>
+			
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-threshold &lt;threshold&gt;</span></td>
+			            <td colspan="1" rowspan="1">磁盘容量的百分比。这会覆盖缺省的阀值。</td>
+			           
+</tr>
+			     
+</table>
+<a name="N10423"></a><a name="daemonlog"></a>
+<h3 class="h4"> daemonlog </h3>
+<p>
+					 获取或设置每个守护进程的日志级别。
+				</p>
+<p>
+					
+<span class="codefrag">用法:hadoop daemonlog  -getlevel &lt;host:port&gt; &lt;name&gt;</span>
+<br>
+					
+<span class="codefrag">用法:hadoop daemonlog  -setlevel &lt;host:port&gt; &lt;name&gt; &lt;level&gt;</span>
+				
+</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+			          
+<tr>
+<th colspan="1" rowspan="1"> 命令选项 </th><th colspan="1" rowspan="1"> 描述</th>
+</tr>
+			
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-getlevel &lt;host:port&gt; &lt;name&gt;</span></td>
+			            <td colspan="1" rowspan="1">打印运行在&lt;host:port&gt;的守护进程的日志级别。这个命令内部会连接http://&lt;host:port&gt;/logLevel?log=&lt;name&gt;</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-setlevel &lt;host:port&gt; &lt;name&gt; &lt;level&gt;</span></td>
+			            <td colspan="1" rowspan="1">设置运行在&lt;host:port&gt;的守护进程的日志级别。这个命令内部会连接http://&lt;host:port&gt;/logLevel?log=&lt;name&gt;</td>
+			           
+</tr>
+			     
+</table>
+<a name="N10460"></a><a name="datanode"></a>
+<h3 class="h4"> datanode</h3>
+<p>
+					运行一个HDFS的datanode。
+				</p>
+<p>
+					
+<span class="codefrag">用法:hadoop datanode [-rollback]</span>
+				
+</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+			          
+<tr>
+<th colspan="1" rowspan="1"> 命令选项 </th><th colspan="1" rowspan="1"> 描述</th>
+</tr>
+			
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-rollback</span></td>
+			            <td colspan="1" rowspan="1">将datanode回滚到前一个版本。这需要在停止datanode,分发老的hadoop版本之后使用。
+			            </td>
+			           
+</tr>
+			     
+</table>
+<a name="N1048B"></a><a name="dfsadmin"></a>
+<h3 class="h4"> dfsadmin </h3>
+<p>
+					运行一个HDFS的dfsadmin客户端。
+				</p>
+<p>
+					
+<span class="codefrag">用法:hadoop dfsadmin  [</span><a href="commands_manual.html#%E5%B8%B8%E8%A7%84%E9%80%89%E9%A1%B9">GENERIC_OPTIONS</a><span class="codefrag">] [-report] [-safemode enter | leave | get | wait] [-refreshNodes]
+					 [-finalizeUpgrade] [-upgradeProgress status | details | force] [-metasave filename] 
+					 [-setQuota &lt;quota&gt; &lt;dirname&gt;...&lt;dirname&gt;] [-clrQuota &lt;dirname&gt;...&lt;dirname&gt;] 
+					 [-help [cmd]]</span>
+				
+</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+			          
+<tr>
+<th colspan="1" rowspan="1"> 命令选项 </th><th colspan="1" rowspan="1"> 描述</th>
+</tr>
+			
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-report</span></td>
+			            <td colspan="1" rowspan="1">报告文件系统的基本信息和统计信息。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-safemode enter | leave | get | wait</span></td>
+			            <td colspan="1" rowspan="1">安全模式维护命令。安全模式是Namenode的一个状态,这种状态下,Namenode <br>
+					1.  不接受对名字空间的更改(只读)<br> 
+					2.  不复制或删除块<br>
+					Namenode会在启动时自动进入安全模式,当配置的块最小百分比数满足最小的副本数条件时,会自动离开安全模式。安全模式可以手动进入,但是这样的话也必须手动关闭安全模式。
+                </td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-refreshNodes</span></td>
+			            <td colspan="1" rowspan="1">重新读取hosts和exclude文件,更新允许连到Namenode的或那些需要退出或入编的Datanode的集合。
+                </td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-finalizeUpgrade</span></td>
+			            <td colspan="1" rowspan="1">终结HDFS的升级操作。Datanode删除前一个版本的工作目录,之后Namenode也这样做。这个操作完结整个升级过程。
+                </td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-upgradeProgress status | details | force</span></td>
+			            <td colspan="1" rowspan="1">请求当前系统的升级状态,状态的细节,或者强制升级操作进行。
+                </td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-metasave filename</span></td>
+			            <td colspan="1" rowspan="1">保存Namenode的主要数据结构到hadoop.log.dir属性指定的目录下的&lt;filename&gt;文件。对于下面的每一项,&lt;filename&gt;中都会一行内容与之对应<br>
+                        1. Namenode收到的Datanode的心跳信号<br>
+                        2. 等待被复制的块<br>
+                        3. 正在被复制的块<br>
+                        4. 等待被删除的块</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-setQuota &lt;quota&gt; &lt;dirname&gt;...&lt;dirname&gt;</span></td>
+			            <td colspan="1" rowspan="1">为每个目录 &lt;dirname&gt;设定配额&lt;quota&gt;。目录配额是一个长整型整数,强制限定了目录树下的名字个数。<br>
+                命令会在这个目录上工作良好,以下情况会报错:<br>
+                1. N不是一个正整数,或者<br>
+                2. 用户不是管理员,或者<br>
+                3. 这个目录不存在或是文件,或者<br>
+                4. 目录会马上超出新设定的配额。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-clrQuota &lt;dirname&gt;...&lt;dirname&gt;</span></td>
+			            <td colspan="1" rowspan="1">为每一个目录&lt;dirname&gt;清除配额设定。<br>
+                命令会在这个目录上工作良好,以下情况会报错:<br>
+                1. 这个目录不存在或是文件,或者<br>
+                2. 用户不是管理员。<br>
+                如果目录原来没有配额不会报错。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-help [cmd]</span></td>
+			            <td colspan="1" rowspan="1">显示给定命令的帮助信息,如果没有给定命令,则显示所有命令的帮助信息。</td>
+			           
+</tr>
+			     
+</table>
+<a name="N1054B"></a><a name="jobtracker"></a>
+<h3 class="h4"> jobtracker </h3>
+<p>
+					运行MapReduce job Tracker节点。
+				</p>
+<p>
+					
+<span class="codefrag">用法:hadoop jobtracker</span>
+				
+</p>
+<a name="N1055B"></a><a name="namenode"></a>
+<h3 class="h4"> namenode </h3>
+<p>
+					运行namenode。有关升级,回滚,升级终结的更多信息请参考<a href="hdfs_user_guide.html#%E5%8D%87%E7%BA%A7%E5%92%8C%E5%9B%9E%E6%BB%9A">升级和回滚</a>。
+				</p>
+<p>
+					
+<span class="codefrag">用法:hadoop namenode [-format] | [-upgrade] | [-rollback] | [-finalize] | [-importCheckpoint]</span>
+				
+</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+			          
+<tr>
+<th colspan="1" rowspan="1"> 命令选项 </th><th colspan="1" rowspan="1"> 描述</th>
+</tr>
+			
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-format</span></td>
+			            <td colspan="1" rowspan="1">格式化namenode。它启动namenode,格式化namenode,之后关闭namenode。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-upgrade</span></td>
+			            <td colspan="1" rowspan="1">分发新版本的hadoop后,namenode应以upgrade选项启动。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-rollback</span></td>
+			            <td colspan="1" rowspan="1">将namenode回滚到前一版本。这个选项要在停止集群,分发老的hadoop版本后使用。
+			            </td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-finalize</span></td>
+			            <td colspan="1" rowspan="1">finalize会删除文件系统的前一状态。最近的升级会被持久化,rollback选项将再不可用,升级终结操作之后,它会停掉namenode。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-importCheckpoint</span></td>
+			            <td colspan="1" rowspan="1">从检查点目录装载镜像并保存到当前检查点目录,检查点目录由fs.checkpoint.dir指定。
+			            </td>
+			           
+</tr>
+			     
+</table>
+<a name="N105C2"></a><a name="secondarynamenode"></a>
+<h3 class="h4"> secondarynamenode </h3>
+<p>
+					运行HDFS的secondary namenode。参考<a href="hdfs_user_guide.html#Secondary+NameNode">Secondary Namenode</a>了解更多。 
+				</p>
+<p>
+					
+<span class="codefrag">用法:hadoop secondarynamenode [-checkpoint [force]] | [-geteditsize]</span>
+				
+</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+			          
+<tr>
+<th colspan="1" rowspan="1"> 命令选项 </th><th colspan="1" rowspan="1"> 描述</th>
+</tr>
+			
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-checkpoint [force]</span></td>
+			            <td colspan="1" rowspan="1">如果EditLog的大小 &gt;= fs.checkpoint.size,启动Secondary namenode的检查点过程。
+			            如果使用了-force,将不考虑EditLog的大小。</td>
+			           
+</tr>
+			           
+<tr>
+			          	
+<td colspan="1" rowspan="1"><span class="codefrag">-geteditsize</span></td>
+			            <td colspan="1" rowspan="1">打印EditLog大小。</td>
+			           
+</tr>
+			     
+</table>
+<a name="N105FF"></a><a name="tasktracker"></a>
+<h3 class="h4"> tasktracker </h3>
+<p>
+					运行MapReduce的task Tracker节点。
+				</p>
+<p>
+					
+<span class="codefrag">用法:hadoop tasktracker</span>
+				
+</p>
+</div>
+		
+		
+		      
+
+	
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 261 - 0
docs/cn/commands_manual.pdf


+ 563 - 0
docs/cn/distcp.html

@@ -0,0 +1,563 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title>DistCp</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://hadoop.apache.org/core/">项目</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop">维基</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.18文档</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">文档</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">概述</a>
+</div>
+<div class="menuitem">
+<a href="quickstart.html">快速入门</a>
+</div>
+<div class="menuitem">
+<a href="cluster_setup.html">集群搭建</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_design.html">HDFS构架设计</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_user_guide.html">HDFS使用指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_permissions_guide.html">HDFS权限指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_quota_admin_guide.html">HDFS配额管理指南</a>
+</div>
+<div class="menuitem">
+<a href="commands_manual.html">命令手册</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_shell.html">FS Shell使用指南</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">DistCp使用指南</div>
+</div>
+<div class="menuitem">
+<a href="mapred_tutorial.html">Map-Reduce教程</a>
+</div>
+<div class="menuitem">
+<a href="native_libraries.html">Hadoop本地库</a>
+</div>
+<div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
+<a href="hadoop_archives.html">Hadoop Archives</a>
+</div>
+<div class="menuitem">
+<a href="hod.html">Hadoop On Demand</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/">维基</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">发行说明</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">变更日志</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="distcp.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>DistCp</h1>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#%E6%A6%82%E8%BF%B0">概述</a>
+</li>
+<li>
+<a href="#%E4%BD%BF%E7%94%A8%E6%96%B9%E6%B3%95">使用方法</a>
+<ul class="minitoc">
+<li>
+<a href="#%E5%9F%BA%E6%9C%AC%E4%BD%BF%E7%94%A8%E6%96%B9%E6%B3%95">基本使用方法</a>
+</li>
+<li>
+<a href="#options">选项</a>
+<ul class="minitoc">
+<li>
+<a href="#%E9%80%89%E9%A1%B9%E7%B4%A2%E5%BC%95">选项索引</a>
+</li>
+<li>
+<a href="#uo">更新和覆盖</a>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+<li>
+<a href="#etc">附录</a>
+<ul class="minitoc">
+<li>
+<a href="#Map%E6%95%B0%E7%9B%AE">Map数目</a>
+</li>
+<li>
+<a href="#cpver">不同HDFS版本间的拷贝</a>
+</li>
+<li>
+<a href="#Map%2FReduce%E5%92%8C%E5%89%AF%E6%95%88%E5%BA%94">Map/Reduce和副效应</a>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+
+    
+<a name="N1000D"></a><a name="%E6%A6%82%E8%BF%B0"></a>
+<h2 class="h3">概述</h2>
+<div class="section">
+<p>DistCp(分布式拷贝)是用于大规模集群内部和集群之间拷贝的工具。
+	  它使用Map/Reduce实现文件分发,错误处理和恢复,以及报告生成。
+      它把文件和目录的列表作为map任务的输入,每个任务会完成源列表中部分文件的拷贝。
+      由于使用了Map/Reduce方法,这个工具在语义和执行上都会有特殊的地方。
+      这篇文档会为常用DistCp操作提供指南并阐述它的工作模型。
+      </p>
+</div>
+
+    
+<a name="N10017"></a><a name="%E4%BD%BF%E7%94%A8%E6%96%B9%E6%B3%95"></a>
+<h2 class="h3">使用方法</h2>
+<div class="section">
+<a name="N1001D"></a><a name="%E5%9F%BA%E6%9C%AC%E4%BD%BF%E7%94%A8%E6%96%B9%E6%B3%95"></a>
+<h3 class="h4">基本使用方法</h3>
+<p>DistCp最常用在集群之间的拷贝:</p>
+<p>
+<span class="codefrag">bash$ hadoop distcp hdfs://nn1:8020/foo/bar \</span>
+<br>
+           
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+                 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+                 hdfs://nn2:8020/bar/foo</span>
+</p>
+<p>这条命令会把nn1集群的<span class="codefrag">/foo/bar</span>目录下的所有文件或目录名展开并存储到一个临时文件中,这些文件内容的拷贝工作被分配给多个map任务,
+        然后每个TaskTracker分别执行从nn1到nn2的拷贝操作。注意DistCp使用绝对路径进行操作。
+        </p>
+<p>命令行中可以指定多个源目录:</p>
+<p>
+<span class="codefrag">bash$ hadoop distcp hdfs://nn1:8020/foo/a \</span>
+<br>
+           
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+                 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+                 hdfs://nn1:8020/foo/b \</span>
+<br>
+           
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+                 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+                 hdfs://nn2:8020/bar/foo</span>
+</p>
+<p>或者使用<span class="codefrag">-f</span>选项,从文件里获得多个源:<br>
+        
+<span class="codefrag">bash$ hadoop distcp -f hdfs://nn1:8020/srclist \</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              &nbsp;hdfs://nn2:8020/bar/foo</span>
+<br>
+</p>
+<p>其中<span class="codefrag">srclist</span> 的内容是<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b</span>
+</p>
+<p>当从多个源拷贝时,如果两个源冲突,DistCp会停止拷贝并提示出错信息,
+        如果在目的位置发生冲突,会根据<a href="#options">选项设置</a>解决。
+        默认情况会跳过已经存在的目标文件(比如不用源文件做替换操作)。每次操作结束时
+        都会报告跳过的文件数目,但是如果某些拷贝操作失败了,但在之后的尝试成功了,
+        那么报告的信息可能不够精确(请参考<a href="#etc">附录</a>)。
+	</p>
+<p>每个TaskTracker必须都能够与源端和目的端文件系统进行访问和交互。
+        对于HDFS来说,源和目的端要运行相同版本的协议或者使用向下兼容的协议。
+        (请参考<a href="#cpver">不同版本间的拷贝</a> )。
+        </p>
+<p>拷贝完成后,建议生成源端和目的端文件的列表,并交叉检查,来确认拷贝真正成功。
+        因为DistCp使用Map/Reduce和文件系统API进行操作,所以这三者或它们之间有任何问题
+        都会影响拷贝操作。一些Distcp命令的成功执行可以通过再次执行带-update参数的该命令来完成,
+        但用户在如此操作之前应该对该命令的语法很熟悉。
+        </p>
+<p>值得注意的是,当另一个客户端同时在向源文件写入时,拷贝很有可能会失败。
+        尝试覆盖HDFS上正在被写入的文件的操作也会失败。
+        如果一个源文件在拷贝之前被移动或删除了,拷贝失败同时输出异常
+        FileNotFoundException。</p>
+<a name="N1007B"></a><a name="options"></a>
+<h3 class="h4">选项</h3>
+<a name="N10081"></a><a name="%E9%80%89%E9%A1%B9%E7%B4%A2%E5%BC%95"></a>
+<h4>选项索引</h4>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+          
+<tr>
+<th colspan="1" rowspan="1"> 标识  </th><th colspan="1" rowspan="1"> 描述 </th><th colspan="1" rowspan="1"> 备注 </th>
+</tr>
+
+          
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">-p[rbugp]</span></td>
+              <td colspan="1" rowspan="1">Preserve<br>
+                  &nbsp;&nbsp;r: replication number<br>
+                  &nbsp;&nbsp;b: block size<br>
+                  &nbsp;&nbsp;u: user<br>
+                  &nbsp;&nbsp;g: group<br>
+                  &nbsp;&nbsp;p: permission<br>
+</td>
+              <td colspan="1" rowspan="1">修改次数不会被保留。并且当指定
+              <span class="codefrag">-update</span> 时,更新的状态<strong>不</strong>会
+              被同步,除非文件大小不同(比如文件被重新创建)。
+              </td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">-i</span></td>
+              <td colspan="1" rowspan="1">忽略失败</td>
+              <td colspan="1" rowspan="1">就像在 <a href="#etc">附录</a>中提到的,这个选项会比默认情况提供关于拷贝的更精确的统计, 同时它还将保留失败拷贝操作的日志,这些日志信息可以用于调试。最后,如果一个map失败了,但并没完成所有分块任务的尝试,这不会导致整个作业的失败。
+              </td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">-log &lt;logdir&gt;</span></td>
+              <td colspan="1" rowspan="1">记录日志到 &lt;logdir&gt;</td>
+              <td colspan="1" rowspan="1">DistCp为每个文件的每次尝试拷贝操作都记录日志,并把日志作为map的输出。
+              如果一个map失败了,当重新执行时这个日志不会被保留。
+              </td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">-m &lt;num_maps&gt;</span></td>
+              <td colspan="1" rowspan="1">同时拷贝的最大数目</td>
+              <td colspan="1" rowspan="1">指定了拷贝数据时map的数目。请注意并不是map数越多吞吐量越大。
+              </td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">-overwrite</span></td>
+              <td colspan="1" rowspan="1">覆盖目标</td>
+              <td colspan="1" rowspan="1">如果一个map失败并且没有使用<span class="codefrag">-i</span>选项,不仅仅那些拷贝失败的文件,这个分块任务中的所有文件都会被重新拷贝。
+			  就像<a href="#uo">下面</a>提到的,它会改变生成目标路径的语义,所以
+              用户要小心使用这个选项。
+              </td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">-update</span></td>
+              <td colspan="1" rowspan="1">如果源和目标的大小不一样则进行覆盖</td>
+              <td colspan="1" rowspan="1">像之前提到的,这不是"同步"操作。
+              执行覆盖的唯一标准是源文件和目标文件大小是否相同;如果不同,则源文件替换目标文件。
+              像 <a href="#uo">下面</a>提到的,它也改变生成目标路径的语义,
+              用户使用要小心。
+              </td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">-f &lt;urilist_uri&gt;</span></td>
+              <td colspan="1" rowspan="1">使用&lt;urilist_uri&gt; 作为源文件列表</td>
+              <td colspan="1" rowspan="1">这等价于把所有文件名列在命令行中。
+              <span class="codefrag">urilist_uri</span> 列表应该是完整合法的URI。
+              </td>
+</tr>
+
+        
+</table>
+<a name="N10133"></a><a name="uo"></a>
+<h4>更新和覆盖</h4>
+<p>这里给出一些 <span class="codefrag">-update</span>和 <span class="codefrag">-overwrite</span>的例子。
+        考虑一个从<span class="codefrag">/foo/a</span> 和
+        <span class="codefrag">/foo/b</span> 到 <span class="codefrag">/bar/foo</span>的拷贝,源路径包括:
+        </p>
+<p>
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a/aa</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a/ab</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b/ba</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b/ab</span>
+</p>
+<p>如果没设置<span class="codefrag">-update</span>或 <span class="codefrag">-overwrite</span>选项,
+        那么两个源都会映射到目标端的
+        <span class="codefrag">/bar/foo/ab</span>。
+        如果设置了这两个选项,每个源目录的内容都会和目标目录的
+         <strong>内容</strong> 做比较。DistCp碰到这类冲突的情况会终止操作并退出。</p>
+<p>默认情况下,<span class="codefrag">/bar/foo/a</span> 和
+        <span class="codefrag">/bar/foo/b</span> 目录都会被创建,所以并不会有冲突。</p>
+<p>现在考虑一个使用<span class="codefrag">-update</span>合法的操作:<br>
+        
+<span class="codefrag">distcp -update hdfs://nn1:8020/foo/a \</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              hdfs://nn1:8020/foo/b \</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              hdfs://nn2:8020/bar</span>
+</p>
+<p>其中源路径/大小:</p>
+<p>
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a/aa 32</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a/ab 32</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b/ba 64</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b/bb 32</span>
+</p>
+<p>和目的路径/大小:</p>
+<p>
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/aa 32</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/ba 32</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/bb 64</span>
+</p>
+<p>会产生:</p>
+<p>
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/aa 32</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/ab 32</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/ba 64</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/bb 32</span>
+</p>
+<p>只有nn2的<span class="codefrag">aa</span>文件没有被覆盖。如果指定了
+        <span class="codefrag">-overwrite</span>选项,所有文件都会被覆盖。
+        </p>
+</div> <!-- Usage -->
+
+    
+<a name="N101E4"></a><a name="etc"></a>
+<h2 class="h3">附录</h2>
+<div class="section">
+<a name="N101EA"></a><a name="Map%E6%95%B0%E7%9B%AE"></a>
+<h3 class="h4">Map数目</h3>
+<p>DistCp会尝试着均分需要拷贝的内容,这样每个map拷贝差不多相等大小的内容。
+	          但因为文件是最小的拷贝粒度,所以配置增加同时拷贝(如map)的数目不一定会增加实际同时拷贝的数目以及总吞吐量。
+          </p>
+<p>如果没使用<span class="codefrag">-m</span>选项,DistCp会尝试在调度工作时指定map的数目
+          为 <span class="codefrag">min (total_bytes / bytes.per.map, 20 * num_task_trackers)</span>,
+		  其中<span class="codefrag">bytes.per.map</span>默认是256MB。</p>
+<p>建议对于长时间运行或定期运行的作业,根据源和目标集群大小、拷贝数量大小以及带宽调整map的数目。
+          </p>
+<a name="N10203"></a><a name="cpver"></a>
+<h3 class="h4">不同HDFS版本间的拷贝</h3>
+<p>对于不同Hadoop版本间的拷贝,用户应该使用HftpFileSystem。
+        这是一个只读文件系统,所以DistCp必须运行在目标端集群上(更确切的说是在能够写入目标集群的TaskTracker上)。
+        源的格式是
+        <span class="codefrag">hftp://&lt;dfs.http.address&gt;/&lt;path&gt;</span>
+        (默认情况<span class="codefrag">dfs.http.address</span>是
+        &lt;namenode&gt;:50070)。</p>
+<a name="N10213"></a><a name="Map%2FReduce%E5%92%8C%E5%89%AF%E6%95%88%E5%BA%94"></a>
+<h3 class="h4">Map/Reduce和副效应</h3>
+<p>像前面提到的,map拷贝输入文件失败时,会带来一些副效应。
+        </p>
+<ul>
+
+          
+<li>除非使用了<span class="codefrag">-i</span>,任务产生的日志会被新的尝试替换掉。
+          </li>
+
+          
+<li>除非使用了<span class="codefrag">-overwrite</span>,文件被之前的map成功拷贝后当又一次执行拷贝时会被标记为
+          "被忽略"。</li>
+
+          
+<li>如果map失败了<span class="codefrag">mapred.map.max.attempts</span>次,剩下的map任务会被终止(除非使用了<span class="codefrag">-i</span>)。
+          </li>
+
+          
+<li>如果<span class="codefrag">mapred.speculative.execution</span>被设置为
+          <span class="codefrag">final</span>和<span class="codefrag">true</span>,则拷贝的结果是未定义的。</li>
+
+        
+</ul>
+</div> <!-- Appendix -->
+
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 149 - 0
docs/cn/distcp.pdf


+ 1108 - 0
docs/cn/hadoop-default.html

@@ -0,0 +1,1108 @@
+<html>
+<body>
+<table border="1">
+<tr>
+<td>name</td><td>value</td><td>description</td>
+</tr>
+<tr>
+<td><a name="hadoop.tmp.dir">hadoop.tmp.dir</a></td><td>/tmp/hadoop-${user.name}</td><td>A base for other temporary directories.</td>
+</tr>
+<tr>
+<td><a name="hadoop.native.lib">hadoop.native.lib</a></td><td>true</td><td>Should native hadoop libraries, if present, be used.</td>
+</tr>
+<tr>
+<td><a name="hadoop.http.filter.initializers">hadoop.http.filter.initializers</a></td><td></td><td>A comma separated list of class names. Each class in the list 
+  must extend org.apache.hadoop.http.FilterInitializer. The corresponding 
+  Filter will be initialized. Then, the Filter will be applied to all user 
+  facing jsp and servlet web pages.  The ordering of the list defines the 
+  ordering of the filters.</td>
+</tr>
+<tr>
+<td><a name="hadoop.logfile.size">hadoop.logfile.size</a></td><td>10000000</td><td>The max size of each log file</td>
+</tr>
+<tr>
+<td><a name="hadoop.logfile.count">hadoop.logfile.count</a></td><td>10</td><td>The max number of log files</td>
+</tr>
+<tr>
+<td><a name="hadoop.job.history.location">hadoop.job.history.location</a></td><td></td><td> If job tracker is static the history files are stored 
+  in this single well known place. If No value is set here, by default,
+  it is in the local file system at ${hadoop.log.dir}/history.
+  </td>
+</tr>
+<tr>
+<td><a name="hadoop.job.history.user.location">hadoop.job.history.user.location</a></td><td></td><td> User can specify a location to store the history files of 
+  a particular job. If nothing is specified, the logs are stored in 
+  output directory. The files are stored in "_logs/history/" in the directory.
+  User can stop logging by giving the value "none". 
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.namenode.logging.level">dfs.namenode.logging.level</a></td><td>info</td><td>The logging level for dfs namenode. Other values are "dir"(trac
+e namespace mutations), "block"(trace block under/over replications and block
+creations/deletions), or "all".</td>
+</tr>
+<tr>
+<td><a name="io.sort.factor">io.sort.factor</a></td><td>10</td><td>The number of streams to merge at once while sorting
+  files.  This determines the number of open file handles.</td>
+</tr>
+<tr>
+<td><a name="io.sort.mb">io.sort.mb</a></td><td>100</td><td>The total amount of buffer memory to use while sorting 
+  files, in megabytes.  By default, gives each merge stream 1MB, which
+  should minimize seeks.</td>
+</tr>
+<tr>
+<td><a name="io.sort.record.percent">io.sort.record.percent</a></td><td>0.05</td><td>The percentage of io.sort.mb dedicated to tracking record
+  boundaries. Let this value be r, io.sort.mb be x. The maximum number
+  of records collected before the collection thread must block is equal
+  to (r * x) / 4</td>
+</tr>
+<tr>
+<td><a name="io.sort.spill.percent">io.sort.spill.percent</a></td><td>0.80</td><td>The soft limit in either the buffer or record collection
+  buffers. Once reached, a thread will begin to spill the contents to disk
+  in the background. Note that this does not imply any chunking of data to
+  the spill. A value less than 0.5 is not recommended.</td>
+</tr>
+<tr>
+<td><a name="io.file.buffer.size">io.file.buffer.size</a></td><td>4096</td><td>The size of buffer for use in sequence files.
+  The size of this buffer should probably be a multiple of hardware
+  page size (4096 on Intel x86), and it determines how much data is
+  buffered during read and write operations.</td>
+</tr>
+<tr>
+<td><a name="io.bytes.per.checksum">io.bytes.per.checksum</a></td><td>512</td><td>The number of bytes per checksum.  Must not be larger than
+  io.file.buffer.size.</td>
+</tr>
+<tr>
+<td><a name="io.skip.checksum.errors">io.skip.checksum.errors</a></td><td>false</td><td>If true, when a checksum error is encountered while
+  reading a sequence file, entries are skipped, instead of throwing an
+  exception.</td>
+</tr>
+<tr>
+<td><a name="io.map.index.skip">io.map.index.skip</a></td><td>0</td><td>Number of index entries to skip between each entry.
+  Zero by default. Setting this to values larger than zero can
+  facilitate opening large map files using less memory.</td>
+</tr>
+<tr>
+<td><a name="io.compression.codecs">io.compression.codecs</a></td><td>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</td><td>A list of the compression codec classes that can be used 
+               for compression/decompression.</td>
+</tr>
+<tr>
+<td><a name="io.serializations">io.serializations</a></td><td>org.apache.hadoop.io.serializer.WritableSerialization</td><td>A list of serialization classes that can be used for
+  obtaining serializers and deserializers.</td>
+</tr>
+<tr>
+<td><a name="fs.default.name">fs.default.name</a></td><td>file:///</td><td>The name of the default file system.  A URI whose
+  scheme and authority determine the FileSystem implementation.  The
+  uri's scheme determines the config property (fs.SCHEME.impl) naming
+  the FileSystem implementation class.  The uri's authority is used to
+  determine the host, port, etc. for a filesystem.</td>
+</tr>
+<tr>
+<td><a name="fs.trash.interval">fs.trash.interval</a></td><td>0</td><td>Number of minutes between trash checkpoints.
+  If zero, the trash feature is disabled.
+  </td>
+</tr>
+<tr>
+<td><a name="fs.file.impl">fs.file.impl</a></td><td>org.apache.hadoop.fs.LocalFileSystem</td><td>The FileSystem for file: uris.</td>
+</tr>
+<tr>
+<td><a name="fs.hdfs.impl">fs.hdfs.impl</a></td><td>org.apache.hadoop.hdfs.DistributedFileSystem</td><td>The FileSystem for hdfs: uris.</td>
+</tr>
+<tr>
+<td><a name="fs.s3.impl">fs.s3.impl</a></td><td>org.apache.hadoop.fs.s3.S3FileSystem</td><td>The FileSystem for s3: uris.</td>
+</tr>
+<tr>
+<td><a name="fs.s3n.impl">fs.s3n.impl</a></td><td>org.apache.hadoop.fs.s3native.NativeS3FileSystem</td><td>The FileSystem for s3n: (Native S3) uris.</td>
+</tr>
+<tr>
+<td><a name="fs.kfs.impl">fs.kfs.impl</a></td><td>org.apache.hadoop.fs.kfs.KosmosFileSystem</td><td>The FileSystem for kfs: uris.</td>
+</tr>
+<tr>
+<td><a name="fs.hftp.impl">fs.hftp.impl</a></td><td>org.apache.hadoop.hdfs.HftpFileSystem</td><td></td>
+</tr>
+<tr>
+<td><a name="fs.hsftp.impl">fs.hsftp.impl</a></td><td>org.apache.hadoop.hdfs.HsftpFileSystem</td><td></td>
+</tr>
+<tr>
+<td><a name="fs.ftp.impl">fs.ftp.impl</a></td><td>org.apache.hadoop.fs.ftp.FTPFileSystem</td><td>The FileSystem for ftp: uris.</td>
+</tr>
+<tr>
+<td><a name="fs.ramfs.impl">fs.ramfs.impl</a></td><td>org.apache.hadoop.fs.InMemoryFileSystem</td><td>The FileSystem for ramfs: uris.</td>
+</tr>
+<tr>
+<td><a name="fs.har.impl">fs.har.impl</a></td><td>org.apache.hadoop.fs.HarFileSystem</td><td>The filesystem for Hadoop archives. </td>
+</tr>
+<tr>
+<td><a name="fs.checkpoint.dir">fs.checkpoint.dir</a></td><td>${hadoop.tmp.dir}/dfs/namesecondary</td><td>Determines where on the local filesystem the DFS secondary
+      name node should store the temporary images to merge.
+      If this is a comma-delimited list of directories then the image is
+      replicated in all of the directories for redundancy.
+  </td>
+</tr>
+<tr>
+<td><a name="fs.checkpoint.edits.dir">fs.checkpoint.edits.dir</a></td><td>${fs.checkpoint.dir}</td><td>Determines where on the local filesystem the DFS secondary
+      name node should store the temporary edits to merge.
+      If this is a comma-delimited list of directoires then teh edits is
+      replicated in all of the directoires for redundancy.
+      Default value is same as fs.checkpoint.dir
+  </td>
+</tr>
+<tr>
+<td><a name="fs.checkpoint.period">fs.checkpoint.period</a></td><td>3600</td><td>The number of seconds between two periodic checkpoints.
+  </td>
+</tr>
+<tr>
+<td><a name="fs.checkpoint.size">fs.checkpoint.size</a></td><td>67108864</td><td>The size of the current edit log (in bytes) that triggers
+       a periodic checkpoint even if the fs.checkpoint.period hasn't expired.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.secondary.http.address">dfs.secondary.http.address</a></td><td>0.0.0.0:50090</td><td>
+    The secondary namenode http server address and port.
+    If the port is 0 then the server will start on a free port.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.datanode.address">dfs.datanode.address</a></td><td>0.0.0.0:50010</td><td>
+    The address where the datanode server will listen to.
+    If the port is 0 then the server will start on a free port.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.datanode.http.address">dfs.datanode.http.address</a></td><td>0.0.0.0:50075</td><td>
+    The datanode http server address and port.
+    If the port is 0 then the server will start on a free port.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.datanode.ipc.address">dfs.datanode.ipc.address</a></td><td>0.0.0.0:50020</td><td>
+    The datanode ipc server address and port.
+    If the port is 0 then the server will start on a free port.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.datanode.handler.count">dfs.datanode.handler.count</a></td><td>3</td><td>The number of server threads for the datanode.</td>
+</tr>
+<tr>
+<td><a name="dfs.http.address">dfs.http.address</a></td><td>0.0.0.0:50070</td><td>
+    The address and the base port where the dfs namenode web ui will listen on.
+    If the port is 0 then the server will start on a free port.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.https.enable">dfs.https.enable</a></td><td>false</td><td>Decide if HTTPS(SSL) is supported on HDFS
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.https.need.client.auth">dfs.https.need.client.auth</a></td><td>false</td><td>Whether SSL client certificate authentication is required
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.https.server.keystore.resource">dfs.https.server.keystore.resource</a></td><td>ssl-server.xml</td><td>Resource file from which ssl server keystore
+  information will be extracted
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.https.client.keystore.resource">dfs.https.client.keystore.resource</a></td><td>ssl-client.xml</td><td>Resource file from which ssl client keystore
+  information will be extracted
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.datanode.https.address">dfs.datanode.https.address</a></td><td>0.0.0.0:50475</td><td></td>
+</tr>
+<tr>
+<td><a name="dfs.https.address">dfs.https.address</a></td><td>0.0.0.0:50470</td><td></td>
+</tr>
+<tr>
+<td><a name="dfs.datanode.dns.interface">dfs.datanode.dns.interface</a></td><td>default</td><td>The name of the Network Interface from which a data node should 
+  report its IP address.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.datanode.dns.nameserver">dfs.datanode.dns.nameserver</a></td><td>default</td><td>The host name or IP address of the name server (DNS)
+  which a DataNode should use to determine the host name used by the
+  NameNode for communication and display purposes.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.replication.considerLoad">dfs.replication.considerLoad</a></td><td>true</td><td>Decide if chooseTarget considers the target's load or not
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.default.chunk.view.size">dfs.default.chunk.view.size</a></td><td>32768</td><td>The number of bytes to view for a file on the browser.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.datanode.du.reserved">dfs.datanode.du.reserved</a></td><td>0</td><td>Reserved space in bytes per volume. Always leave this much space free for non dfs use.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.name.dir">dfs.name.dir</a></td><td>${hadoop.tmp.dir}/dfs/name</td><td>Determines where on the local filesystem the DFS name node
+      should store the name table(fsimage).  If this is a comma-delimited list
+      of directories then the name table is replicated in all of the
+      directories, for redundancy. </td>
+</tr>
+<tr>
+<td><a name="dfs.name.edits.dir">dfs.name.edits.dir</a></td><td>${dfs.name.dir}</td><td>Determines where on the local filesystem the DFS name node
+      should store the transaction (edits) file. If this is a comma-delimited list
+      of directories then the transaction file is replicated in all of the 
+      directories, for redundancy. Default value is same as dfs.name.dir
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.web.ugi">dfs.web.ugi</a></td><td>webuser,webgroup</td><td>The user account used by the web interface.
+    Syntax: USERNAME,GROUP1,GROUP2, ...
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.permissions">dfs.permissions</a></td><td>true</td><td>
+    If "true", enable permission checking in HDFS.
+    If "false", permission checking is turned off,
+    but all other behavior is unchanged.
+    Switching from one parameter value to the other does not change the mode,
+    owner or group of files or directories.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.permissions.supergroup">dfs.permissions.supergroup</a></td><td>supergroup</td><td>The name of the group of super-users.</td>
+</tr>
+<tr>
+<td><a name="dfs.data.dir">dfs.data.dir</a></td><td>${hadoop.tmp.dir}/dfs/data</td><td>Determines where on the local filesystem an DFS data node
+  should store its blocks.  If this is a comma-delimited
+  list of directories, then data will be stored in all named
+  directories, typically on different devices.
+  Directories that do not exist are ignored.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.replication">dfs.replication</a></td><td>3</td><td>Default block replication. 
+  The actual number of replications can be specified when the file is created.
+  The default is used if replication is not specified in create time.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.replication.max">dfs.replication.max</a></td><td>512</td><td>Maximal block replication. 
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.replication.min">dfs.replication.min</a></td><td>1</td><td>Minimal block replication. 
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.block.size">dfs.block.size</a></td><td>67108864</td><td>The default block size for new files.</td>
+</tr>
+<tr>
+<td><a name="dfs.df.interval">dfs.df.interval</a></td><td>60000</td><td>Disk usage statistics refresh interval in msec.</td>
+</tr>
+<tr>
+<td><a name="dfs.client.block.write.retries">dfs.client.block.write.retries</a></td><td>3</td><td>The number of retries for writing blocks to the data nodes, 
+  before we signal failure to the application.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.blockreport.intervalMsec">dfs.blockreport.intervalMsec</a></td><td>3600000</td><td>Determines block reporting interval in milliseconds.</td>
+</tr>
+<tr>
+<td><a name="dfs.blockreport.initialDelay">dfs.blockreport.initialDelay</a></td><td>0</td><td>Delay for first block report in seconds.</td>
+</tr>
+<tr>
+<td><a name="dfs.heartbeat.interval">dfs.heartbeat.interval</a></td><td>3</td><td>Determines datanode heartbeat interval in seconds.</td>
+</tr>
+<tr>
+<td><a name="dfs.namenode.handler.count">dfs.namenode.handler.count</a></td><td>10</td><td>The number of server threads for the namenode.</td>
+</tr>
+<tr>
+<td><a name="dfs.safemode.threshold.pct">dfs.safemode.threshold.pct</a></td><td>0.999f</td><td>
+  	Specifies the percentage of blocks that should satisfy 
+  	the minimal replication requirement defined by dfs.replication.min.
+  	Values less than or equal to 0 mean not to start in safe mode.
+  	Values greater than 1 will make safe mode permanent.
+ 	</td>
+</tr>
+<tr>
+<td><a name="dfs.safemode.extension">dfs.safemode.extension</a></td><td>30000</td><td>
+  	Determines extension of safe mode in milliseconds 
+  	after the threshold level is reached.
+ 	</td>
+</tr>
+<tr>
+<td><a name="dfs.balance.bandwidthPerSec">dfs.balance.bandwidthPerSec</a></td><td>1048576</td><td>
+        Specifies the maximum amount of bandwidth that each datanode
+        can utilize for the balancing purpose in term of
+        the number of bytes per second.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.hosts">dfs.hosts</a></td><td></td><td>Names a file that contains a list of hosts that are
+  permitted to connect to the namenode. The full pathname of the file
+  must be specified.  If the value is empty, all hosts are
+  permitted.</td>
+</tr>
+<tr>
+<td><a name="dfs.hosts.exclude">dfs.hosts.exclude</a></td><td></td><td>Names a file that contains a list of hosts that are
+  not permitted to connect to the namenode.  The full pathname of the
+  file must be specified.  If the value is empty, no hosts are
+  excluded.</td>
+</tr>
+<tr>
+<td><a name="dfs.max.objects">dfs.max.objects</a></td><td>0</td><td>The maximum number of files, directories and blocks
+  dfs supports. A value of zero indicates no limit to the number
+  of objects that dfs supports.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.namenode.decommission.interval">dfs.namenode.decommission.interval</a></td><td>30</td><td>Namenode periodicity in seconds to check if decommission is 
+  complete.</td>
+</tr>
+<tr>
+<td><a name="dfs.namenode.decommission.nodes.per.interval">dfs.namenode.decommission.nodes.per.interval</a></td><td>5</td><td>The number of nodes namenode checks if decommission is complete
+  in each dfs.namenode.decommission.interval.</td>
+</tr>
+<tr>
+<td><a name="dfs.replication.interval">dfs.replication.interval</a></td><td>3</td><td>The periodicity in seconds with which the namenode computes 
+  repliaction work for datanodes. </td>
+</tr>
+<tr>
+<td><a name="dfs.access.time.precision">dfs.access.time.precision</a></td><td>3600000</td><td>The access time for HDFS file is precise upto this value. 
+               The default value is 1 hour. Setting a value of 0 disables
+               access times for HDFS.
+  </td>
+</tr>
+<tr>
+<td><a name="fs.s3.block.size">fs.s3.block.size</a></td><td>67108864</td><td>Block size to use when writing files to S3.</td>
+</tr>
+<tr>
+<td><a name="fs.s3.buffer.dir">fs.s3.buffer.dir</a></td><td>${hadoop.tmp.dir}/s3</td><td>Determines where on the local filesystem the S3 filesystem
+  should store files before sending them to S3
+  (or after retrieving them from S3).
+  </td>
+</tr>
+<tr>
+<td><a name="fs.s3.maxRetries">fs.s3.maxRetries</a></td><td>4</td><td>The maximum number of retries for reading or writing files to S3, 
+  before we signal failure to the application.
+  </td>
+</tr>
+<tr>
+<td><a name="fs.s3.sleepTimeSeconds">fs.s3.sleepTimeSeconds</a></td><td>10</td><td>The number of seconds to sleep between each S3 retry.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.job.tracker">mapred.job.tracker</a></td><td>local</td><td>The host and port that the MapReduce job tracker runs
+  at.  If "local", then jobs are run in-process as a single map
+  and reduce task.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.job.tracker.http.address">mapred.job.tracker.http.address</a></td><td>0.0.0.0:50030</td><td>
+    The job tracker http server address and port the server will listen on.
+    If the port is 0 then the server will start on a free port.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.job.tracker.handler.count">mapred.job.tracker.handler.count</a></td><td>10</td><td>
+    The number of server threads for the JobTracker. This should be roughly
+    4% of the number of tasktracker nodes.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.task.tracker.report.address">mapred.task.tracker.report.address</a></td><td>127.0.0.1:0</td><td>The interface and port that task tracker server listens on. 
+  Since it is only connected to by the tasks, it uses the local interface.
+  EXPERT ONLY. Should only be changed if your host does not have the loopback 
+  interface.</td>
+</tr>
+<tr>
+<td><a name="mapred.local.dir">mapred.local.dir</a></td><td>${hadoop.tmp.dir}/mapred/local</td><td>The local directory where MapReduce stores intermediate
+  data files.  May be a comma-separated list of
+  directories on different devices in order to spread disk i/o.
+  Directories that do not exist are ignored.
+  </td>
+</tr>
+<tr>
+<td><a name="local.cache.size">local.cache.size</a></td><td>10737418240</td><td>The limit on the size of cache you want to keep, set by default
+  to 10GB. This will act as a soft limit on the cache directory for out of band data.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.system.dir">mapred.system.dir</a></td><td>${hadoop.tmp.dir}/mapred/system</td><td>The shared directory where MapReduce stores control files.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.temp.dir">mapred.temp.dir</a></td><td>${hadoop.tmp.dir}/mapred/temp</td><td>A shared directory for temporary files.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.local.dir.minspacestart">mapred.local.dir.minspacestart</a></td><td>0</td><td>If the space in mapred.local.dir drops under this, 
+  do not ask for more tasks.
+  Value in bytes.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.local.dir.minspacekill">mapred.local.dir.minspacekill</a></td><td>0</td><td>If the space in mapred.local.dir drops under this, 
+  	do not ask more tasks until all the current ones have finished and 
+  	cleaned up. Also, to save the rest of the tasks we have running, 
+  	kill one of them, to clean up some space. Start with the reduce tasks,
+  	then go with the ones that have finished the least.
+  	Value in bytes.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.tasktracker.expiry.interval">mapred.tasktracker.expiry.interval</a></td><td>600000</td><td>Expert: The time-interval, in miliseconds, after which
+  a tasktracker is declared 'lost' if it doesn't send heartbeats.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.tasktracker.instrumentation">mapred.tasktracker.instrumentation</a></td><td>org.apache.hadoop.mapred.TaskTrackerMetricsInst</td><td>Expert: The instrumentation class to associate with each TaskTracker.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.tasktracker.vmem.reserved">mapred.tasktracker.vmem.reserved</a></td><td>-1</td><td>Configuration property to specify the amount of virtual memory
+    that has to be reserved by the TaskTracker for system usage (OS, TT etc).
+    The reserved virtual memory should be a part of the total virtual memory
+    available on the TaskTracker.
+    
+    The reserved virtual memory and the total virtual memory values are
+    reported by the TaskTracker as part of heart-beat so that they can
+    considered by a scheduler. Please refer to the documentation of the
+    configured scheduler to see how this property is used.
+    
+    These two values are also used by a TaskTracker for tracking tasks' memory
+    usage. Memory management functionality on a TaskTracker is disabled if this
+    property is set to -1, if it more than the total virtual memory on the 
+    tasktracker, or if either of the values is negative.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.tasktracker.pmem.reserved">mapred.tasktracker.pmem.reserved</a></td><td>-1</td><td>Configuration property to specify the amount of physical memory
+    that has to be reserved by the TaskTracker for system usage (OS, TT etc).
+    The reserved physical memory should be a part of the total physical memory
+    available on the TaskTracker.
+
+    The reserved physical memory and the total physical memory values are
+    reported by the TaskTracker as part of heart-beat so that they can
+    considered by a scheduler. Please refer to the documentation of the
+    configured scheduler to see how this property is used.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.task.default.maxvmem">mapred.task.default.maxvmem</a></td><td>-1</td><td>
+    Cluster-wide configuration in bytes to be set by the administrators that
+    provides default amount of maximum virtual memory for job's tasks. This has
+    to be set on both the JobTracker node for the sake of scheduling decisions
+    and on the TaskTracker nodes for the sake of memory management.
+
+    If a job doesn't specify its virtual memory requirement by setting
+    mapred.task.maxvmem to -1, tasks are assured a memory limit set
+    to this property. This property is set to -1 by default.
+
+    This value should in general be less than the cluster-wide
+    configuration mapred.task.limit.maxvmem. If not or if it is not set,
+    TaskTracker's memory management will be disabled and a scheduler's memory
+    based scheduling decisions may be affected. Please refer to the
+    documentation of the configured scheduler to see how this property is used.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.task.limit.maxvmem">mapred.task.limit.maxvmem</a></td><td>-1</td><td>
+    Cluster-wide configuration in bytes to be set by the site administrators
+    that provides an upper limit on the maximum virtual memory that can be
+    specified by a job via mapred.task.maxvmem. This has to be set on both the
+    JobTracker node for the sake of scheduling decisions and on the TaskTracker
+    nodes for the sake of memory management.
+    
+    The job configuration mapred.task.maxvmem should not be more than this
+    value, otherwise depending on the scheduler being configured, the job may
+    be rejected or the job configuration may just be ignored. Please refer to
+    the documentation of the configured scheduler to see how this property is
+    used.
+
+    If it is not set a TaskTracker, TaskTracker's memory management will be
+    disabled.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.task.maxvmem">mapred.task.maxvmem</a></td><td>-1</td><td>
+    The maximum amount of virtual memory any task of a job will use, in bytes.
+
+    This value will be used by TaskTrackers for monitoring the memory usage of
+    tasks of this jobs. If a TaskTracker's memory management functionality is
+    enabled, each task of this job will be allowed to use a maximum virtual
+    memory specified by this property. If the task's memory usage goes over 
+    this value, the task will be failed by the TT. If not set, the
+    cluster-wide configuration mapred.task.default.maxvmem is used as the
+    default value for memory requirements. If this property cascaded with
+    mapred.task.default.maxvmem becomes equal to -1, the job's tasks will
+    not be assured any particular amount of virtual memory and may be killed by
+    a TT that intends to control the total memory usage of the tasks via memory
+    management functionality. If the memory management functionality is
+    disabled on a TT, this value is ignored.
+
+    This value should not be more than the cluster-wide configuration
+    mapred.task.limit.maxvmem.
+
+    This value may be used by schedulers that support scheduling based on job's
+    memory requirements. Please refer to the documentation of the scheduler
+    being configured to see if it does memory based scheduling and if it does,
+    how this property is used by that scheduler.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.task.maxpmem">mapred.task.maxpmem</a></td><td>-1</td><td>
+   The maximum amount of physical memory any task of a job will use in bytes.
+
+   This value may be used by schedulers that support scheduling based on job's
+   memory requirements. In general, a task of this job will be scheduled on a
+   TaskTracker, only if the amount of physical memory still unoccupied on the
+   TaskTracker is greater than or equal to this value. Different schedulers can
+   take different decisions, some might just ignore this value. Please refer to
+   the documentation of the scheduler being configured to see if it does
+   memory based scheduling and if it does, how this variable is used by that
+   scheduler.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.tasktracker.memory_calculator_plugin">mapred.tasktracker.memory_calculator_plugin</a></td><td></td><td>
+   Name of the class whose instance will be used to query memory information
+   on the tasktracker.
+   
+   The class must be an instance of 
+   org.apache.hadoop.util.MemoryCalculatorPlugin. If the value is null, the
+   tasktracker attempts to use a class appropriate to the platform. 
+   Currently, the only platform supported is Linux.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.tasktracker.taskmemorymanager.monitoring-interval">mapred.tasktracker.taskmemorymanager.monitoring-interval</a></td><td>5000</td><td>The interval, in milliseconds, for which the tasktracker waits
+   between two cycles of monitoring its tasks' memory usage. Used only if
+   tasks' memory management is enabled via mapred.tasktracker.tasks.maxmemory.
+   </td>
+</tr>
+<tr>
+<td><a name="mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill">mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</a></td><td>5000</td><td>The time, in milliseconds, the tasktracker waits for sending a
+  SIGKILL to a process that has overrun memory limits, after it has been sent
+  a SIGTERM. Used only if tasks' memory management is enabled via
+  mapred.tasktracker.tasks.maxmemory.</td>
+</tr>
+<tr>
+<td><a name="mapred.map.tasks">mapred.map.tasks</a></td><td>2</td><td>The default number of map tasks per job.  Typically set
+  to a prime several times greater than number of available hosts.
+  Ignored when mapred.job.tracker is "local".  
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.reduce.tasks">mapred.reduce.tasks</a></td><td>1</td><td>The default number of reduce tasks per job.  Typically set
+  to a prime close to the number of available hosts.  Ignored when
+  mapred.job.tracker is "local".
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.jobtracker.restart.recover">mapred.jobtracker.restart.recover</a></td><td>false</td><td>"true" to enable (job) recovery upon restart,
+               "false" to start afresh
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.jobtracker.job.history.block.size">mapred.jobtracker.job.history.block.size</a></td><td>3145728</td><td>The block size of the job history file. Since the job recovery
+               uses job history, its important to dump job history to disk as 
+               soon as possible. Note that this is an expert level parameter.
+               The default value is set to 3 MB.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.jobtracker.taskScheduler">mapred.jobtracker.taskScheduler</a></td><td>org.apache.hadoop.mapred.JobQueueTaskScheduler</td><td>The class responsible for scheduling the tasks.</td>
+</tr>
+<tr>
+<td><a name="mapred.jobtracker.taskScheduler.maxRunningTasksPerJob">mapred.jobtracker.taskScheduler.maxRunningTasksPerJob</a></td><td></td><td>The maximum number of running tasks for a job before
+  it gets preempted. No limits if undefined.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.map.max.attempts">mapred.map.max.attempts</a></td><td>4</td><td>Expert: The maximum number of attempts per map task.
+  In other words, framework will try to execute a map task these many number
+  of times before giving up on it.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.reduce.max.attempts">mapred.reduce.max.attempts</a></td><td>4</td><td>Expert: The maximum number of attempts per reduce task.
+  In other words, framework will try to execute a reduce task these many number
+  of times before giving up on it.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.reduce.parallel.copies">mapred.reduce.parallel.copies</a></td><td>5</td><td>The default number of parallel transfers run by reduce
+  during the copy(shuffle) phase.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.reduce.copy.backoff">mapred.reduce.copy.backoff</a></td><td>300</td><td>The maximum amount of time (in seconds) a reducer spends on 
+  fetching one map output before declaring it as failed.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.task.timeout">mapred.task.timeout</a></td><td>600000</td><td>The number of milliseconds before a task will be
+  terminated if it neither reads an input, writes an output, nor
+  updates its status string.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.tasktracker.map.tasks.maximum">mapred.tasktracker.map.tasks.maximum</a></td><td>2</td><td>The maximum number of map tasks that will be run
+  simultaneously by a task tracker.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.tasktracker.reduce.tasks.maximum">mapred.tasktracker.reduce.tasks.maximum</a></td><td>2</td><td>The maximum number of reduce tasks that will be run
+  simultaneously by a task tracker.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.jobtracker.completeuserjobs.maximum">mapred.jobtracker.completeuserjobs.maximum</a></td><td>100</td><td>The maximum number of complete jobs per user to keep around 
+  before delegating them to the job history.</td>
+</tr>
+<tr>
+<td><a name="mapred.jobtracker.instrumentation">mapred.jobtracker.instrumentation</a></td><td>org.apache.hadoop.mapred.JobTrackerMetricsInst</td><td>Expert: The instrumentation class to associate with each JobTracker.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.child.java.opts">mapred.child.java.opts</a></td><td>-Xmx200m</td><td>Java opts for the task tracker child processes.  
+  The following symbol, if present, will be interpolated: @taskid@ is replaced 
+  by current TaskID. Any other occurrences of '@' will go unchanged.
+  For example, to enable verbose gc logging to a file named for the taskid in
+  /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
+        -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
+  
+  The configuration variable mapred.child.ulimit can be used to control the
+  maximum virtual memory of the child processes. 
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.child.ulimit">mapred.child.ulimit</a></td><td></td><td>The maximum virtual memory, in KB, of a process launched by the 
+  Map-Reduce framework. This can be used to control both the Mapper/Reducer 
+  tasks and applications using Hadoop Pipes, Hadoop Streaming etc. 
+  By default it is left unspecified to let cluster admins control it via 
+  limits.conf and other such relevant mechanisms.
+  
+  Note: mapred.child.ulimit must be greater than or equal to the -Xmx passed to
+  JavaVM, else the VM might not start. 
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.child.tmp">mapred.child.tmp</a></td><td>./tmp</td><td> To set the value of tmp directory for map and reduce tasks.
+  If the value is an absolute path, it is directly assigned. Otherwise, it is
+  prepended with task's working directory. The java tasks are executed with
+  option -Djava.io.tmpdir='the absolute path of the tmp dir'. Pipes and
+  streaming are set with environment variable,
+   TMPDIR='the absolute path of the tmp dir'
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.inmem.merge.threshold">mapred.inmem.merge.threshold</a></td><td>1000</td><td>The threshold, in terms of the number of files 
+  for the in-memory merge process. When we accumulate threshold number of files
+  we initiate the in-memory merge and spill to disk. A value of 0 or less than
+  0 indicates we want to DON'T have any threshold and instead depend only on
+  the ramfs's memory consumption to trigger the merge.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.job.shuffle.merge.percent">mapred.job.shuffle.merge.percent</a></td><td>0.66</td><td>The usage threshold at which an in-memory merge will be
+  initiated, expressed as a percentage of the total memory allocated to
+  storing in-memory map outputs, as defined by
+  mapred.job.shuffle.input.buffer.percent.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.job.shuffle.input.buffer.percent">mapred.job.shuffle.input.buffer.percent</a></td><td>0.70</td><td>The percentage of memory to be allocated from the maximum heap
+  size to storing map outputs during the shuffle.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.job.reduce.input.buffer.percent">mapred.job.reduce.input.buffer.percent</a></td><td>0.0</td><td>The percentage of memory- relative to the maximum heap size- to
+  retain map outputs during the reduce. When the shuffle is concluded, any
+  remaining map outputs in memory must consume less than this threshold before
+  the reduce can begin.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.map.tasks.speculative.execution">mapred.map.tasks.speculative.execution</a></td><td>true</td><td>If true, then multiple instances of some map tasks 
+               may be executed in parallel.</td>
+</tr>
+<tr>
+<td><a name="mapred.reduce.tasks.speculative.execution">mapred.reduce.tasks.speculative.execution</a></td><td>true</td><td>If true, then multiple instances of some reduce tasks 
+               may be executed in parallel.</td>
+</tr>
+<tr>
+<td><a name="mapred.job.reuse.jvm.num.tasks">mapred.job.reuse.jvm.num.tasks</a></td><td>1</td><td>How many tasks to run per jvm. If set to -1, there is
+  no limit. 
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.min.split.size">mapred.min.split.size</a></td><td>0</td><td>The minimum size chunk that map input should be split
+  into.  Note that some file formats may have minimum split sizes that
+  take priority over this setting.</td>
+</tr>
+<tr>
+<td><a name="mapred.jobtracker.maxtasks.per.job">mapred.jobtracker.maxtasks.per.job</a></td><td>-1</td><td>The maximum number of tasks for a single job.
+  A value of -1 indicates that there is no maximum.  </td>
+</tr>
+<tr>
+<td><a name="mapred.submit.replication">mapred.submit.replication</a></td><td>10</td><td>The replication level for submitted job files.  This
+  should be around the square root of the number of nodes.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.tasktracker.dns.interface">mapred.tasktracker.dns.interface</a></td><td>default</td><td>The name of the Network Interface from which a task
+  tracker should report its IP address.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.tasktracker.dns.nameserver">mapred.tasktracker.dns.nameserver</a></td><td>default</td><td>The host name or IP address of the name server (DNS)
+  which a TaskTracker should use to determine the host name used by
+  the JobTracker for communication and display purposes.
+  </td>
+</tr>
+<tr>
+<td><a name="tasktracker.http.threads">tasktracker.http.threads</a></td><td>40</td><td>The number of worker threads that for the http server. This is
+               used for map output fetching
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.task.tracker.http.address">mapred.task.tracker.http.address</a></td><td>0.0.0.0:50060</td><td>
+    The task tracker http server address and port.
+    If the port is 0 then the server will start on a free port.
+  </td>
+</tr>
+<tr>
+<td><a name="keep.failed.task.files">keep.failed.task.files</a></td><td>false</td><td>Should the files for failed tasks be kept. This should only be 
+               used on jobs that are failing, because the storage is never
+               reclaimed. It also prevents the map outputs from being erased
+               from the reduce directory as they are consumed.</td>
+</tr>
+<tr>
+<td><a name="mapred.output.compress">mapred.output.compress</a></td><td>false</td><td>Should the job outputs be compressed?
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.output.compression.type">mapred.output.compression.type</a></td><td>RECORD</td><td>If the job outputs are to compressed as SequenceFiles, how should
+               they be compressed? Should be one of NONE, RECORD or BLOCK.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.output.compression.codec">mapred.output.compression.codec</a></td><td>org.apache.hadoop.io.compress.DefaultCodec</td><td>If the job outputs are compressed, how should they be compressed?
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.compress.map.output">mapred.compress.map.output</a></td><td>false</td><td>Should the outputs of the maps be compressed before being
+               sent across the network. Uses SequenceFile compression.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.map.output.compression.codec">mapred.map.output.compression.codec</a></td><td>org.apache.hadoop.io.compress.DefaultCodec</td><td>If the map outputs are compressed, how should they be 
+               compressed?
+  </td>
+</tr>
+<tr>
+<td><a name="io.seqfile.compress.blocksize">io.seqfile.compress.blocksize</a></td><td>1000000</td><td>The minimum block size for compression in block compressed 
+  				SequenceFiles.
+  </td>
+</tr>
+<tr>
+<td><a name="io.seqfile.lazydecompress">io.seqfile.lazydecompress</a></td><td>true</td><td>Should values of block-compressed SequenceFiles be decompressed
+  				only when necessary.
+  </td>
+</tr>
+<tr>
+<td><a name="io.seqfile.sorter.recordlimit">io.seqfile.sorter.recordlimit</a></td><td>1000000</td><td>The limit on number of records to be kept in memory in a spill 
+  				in SequenceFiles.Sorter
+  </td>
+</tr>
+<tr>
+<td><a name="map.sort.class">map.sort.class</a></td><td>org.apache.hadoop.util.QuickSort</td><td>The default sort class for sorting keys.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.userlog.limit.kb">mapred.userlog.limit.kb</a></td><td>0</td><td>The maximum size of user-logs of each task in KB. 0 disables the cap.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.userlog.retain.hours">mapred.userlog.retain.hours</a></td><td>24</td><td>The maximum time, in hours, for which the user-logs are to be 
+  				retained.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.hosts">mapred.hosts</a></td><td></td><td>Names a file that contains the list of nodes that may
+  connect to the jobtracker.  If the value is empty, all hosts are
+  permitted.</td>
+</tr>
+<tr>
+<td><a name="mapred.hosts.exclude">mapred.hosts.exclude</a></td><td></td><td>Names a file that contains the list of hosts that
+  should be excluded by the jobtracker.  If the value is empty, no
+  hosts are excluded.</td>
+</tr>
+<tr>
+<td><a name="mapred.max.tracker.blacklists">mapred.max.tracker.blacklists</a></td><td>4</td><td>The number of blacklists for a taskTracker by various jobs 
+               after which the task tracker could be blacklisted across
+               all jobs. The tracker will be given a tasks later 
+               (after a day). The tracker will become a healthy 
+               tracker after a restart. 
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.max.tracker.failures">mapred.max.tracker.failures</a></td><td>4</td><td>The number of task-failures on a tasktracker of a given job 
+               after which new tasks of that job aren't assigned to it.
+  </td>
+</tr>
+<tr>
+<td><a name="jobclient.output.filter">jobclient.output.filter</a></td><td>FAILED</td><td>The filter for controlling the output of the task's userlogs sent
+               to the console of the JobClient. 
+               The permissible options are: NONE, KILLED, FAILED, SUCCEEDED and 
+               ALL.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.job.tracker.persist.jobstatus.active">mapred.job.tracker.persist.jobstatus.active</a></td><td>false</td><td>Indicates if persistency of job status information is
+      active or not.
+    </td>
+</tr>
+<tr>
+<td><a name="mapred.job.tracker.persist.jobstatus.hours">mapred.job.tracker.persist.jobstatus.hours</a></td><td>0</td><td>The number of hours job status information is persisted in DFS.
+    The job status information will be available after it drops of the memory
+    queue and between jobtracker restarts. With a zero value the job status
+    information is not persisted at all in DFS.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.job.tracker.persist.jobstatus.dir">mapred.job.tracker.persist.jobstatus.dir</a></td><td>/jobtracker/jobsInfo</td><td>The directory where the job status information is persisted
+      in a file system to be available after it drops of the memory queue and
+      between jobtracker restarts.
+    </td>
+</tr>
+<tr>
+<td><a name="mapred.task.profile">mapred.task.profile</a></td><td>false</td><td>To set whether the system should collect profiler
+     information for some of the tasks in this job? The information is stored
+     in the user log directory. The value is "true" if task profiling
+     is enabled.</td>
+</tr>
+<tr>
+<td><a name="mapred.task.profile.maps">mapred.task.profile.maps</a></td><td>0-2</td><td> To set the ranges of map tasks to profile.
+    mapred.task.profile has to be set to true for the value to be accounted.
+    </td>
+</tr>
+<tr>
+<td><a name="mapred.task.profile.reduces">mapred.task.profile.reduces</a></td><td>0-2</td><td> To set the ranges of reduce tasks to profile.
+    mapred.task.profile has to be set to true for the value to be accounted.
+    </td>
+</tr>
+<tr>
+<td><a name="mapred.line.input.format.linespermap">mapred.line.input.format.linespermap</a></td><td>1</td><td> Number of lines per split in NLineInputFormat.
+    </td>
+</tr>
+<tr>
+<td><a name="mapred.skip.attempts.to.start.skipping">mapred.skip.attempts.to.start.skipping</a></td><td>2</td><td> The number of Task attempts AFTER which skip mode 
+    will be kicked off. When skip mode is kicked off, the 
+    tasks reports the range of records which it will process 
+    next, to the TaskTracker. So that on failures, TT knows which 
+    ones are possibly the bad records. On further executions, 
+    those are skipped.
+    </td>
+</tr>
+<tr>
+<td><a name="mapred.skip.map.auto.incr.proc.count">mapred.skip.map.auto.incr.proc.count</a></td><td>true</td><td> The flag which if set to true, 
+    SkipBadRecords.COUNTER_MAP_PROCESSED_RECORDS is incremented 
+    by MapRunner after invoking the map function. This value must be set to 
+    false for applications which process the records asynchronously 
+    or buffer the input records. For example streaming. 
+    In such cases applications should increment this counter on their own.
+    </td>
+</tr>
+<tr>
+<td><a name="mapred.skip.reduce.auto.incr.proc.count">mapred.skip.reduce.auto.incr.proc.count</a></td><td>true</td><td> The flag which if set to true, 
+    SkipBadRecords.COUNTER_REDUCE_PROCESSED_GROUPS is incremented 
+    by framework after invoking the reduce function. This value must be set to 
+    false for applications which process the records asynchronously 
+    or buffer the input records. For example streaming. 
+    In such cases applications should increment this counter on their own.
+    </td>
+</tr>
+<tr>
+<td><a name="mapred.skip.out.dir">mapred.skip.out.dir</a></td><td></td><td> If no value is specified here, the skipped records are 
+    written to the output directory at _logs/skip.
+    User can stop writing skipped records by giving the value "none". 
+    </td>
+</tr>
+<tr>
+<td><a name="mapred.skip.map.max.skip.records">mapred.skip.map.max.skip.records</a></td><td>0</td><td> The number of acceptable skip records surrounding the bad 
+    record PER bad record in mapper. The number includes the bad record as well.
+    To turn the feature of detection/skipping of bad records off, set the 
+    value to 0.
+    The framework tries to narrow down the skipped range by retrying  
+    until this threshold is met OR all attempts get exhausted for this task. 
+    Set the value to Long.MAX_VALUE to indicate that framework need not try to 
+    narrow down. Whatever records(depends on application) get skipped are 
+    acceptable.
+    </td>
+</tr>
+<tr>
+<td><a name="mapred.skip.reduce.max.skip.groups">mapred.skip.reduce.max.skip.groups</a></td><td>0</td><td> The number of acceptable skip groups surrounding the bad 
+    group PER bad group in reducer. The number includes the bad group as well.
+    To turn the feature of detection/skipping of bad groups off, set the 
+    value to 0.
+    The framework tries to narrow down the skipped range by retrying  
+    until this threshold is met OR all attempts get exhausted for this task. 
+    Set the value to Long.MAX_VALUE to indicate that framework need not try to 
+    narrow down. Whatever groups(depends on application) get skipped are 
+    acceptable.
+    </td>
+</tr>
+<tr>
+<td><a name="ipc.client.idlethreshold">ipc.client.idlethreshold</a></td><td>4000</td><td>Defines the threshold number of connections after which
+               connections will be inspected for idleness.
+  </td>
+</tr>
+<tr>
+<td><a name="ipc.client.kill.max">ipc.client.kill.max</a></td><td>10</td><td>Defines the maximum number of clients to disconnect in one go.
+  </td>
+</tr>
+<tr>
+<td><a name="ipc.client.connection.maxidletime">ipc.client.connection.maxidletime</a></td><td>10000</td><td>The maximum time in msec after which a client will bring down the
+               connection to the server.
+  </td>
+</tr>
+<tr>
+<td><a name="ipc.client.connect.max.retries">ipc.client.connect.max.retries</a></td><td>10</td><td>Indicates the number of retries a client will make to establish
+               a server connection.
+  </td>
+</tr>
+<tr>
+<td><a name="ipc.server.listen.queue.size">ipc.server.listen.queue.size</a></td><td>128</td><td>Indicates the length of the listen queue for servers accepting
+               client connections.
+  </td>
+</tr>
+<tr>
+<td><a name="ipc.server.tcpnodelay">ipc.server.tcpnodelay</a></td><td>false</td><td>Turn on/off Nagle's algorithm for the TCP socket connection on 
+  the server. Setting to true disables the algorithm and may decrease latency
+  with a cost of more/smaller packets. 
+  </td>
+</tr>
+<tr>
+<td><a name="ipc.client.tcpnodelay">ipc.client.tcpnodelay</a></td><td>false</td><td>Turn on/off Nagle's algorithm for the TCP socket connection on 
+  the client. Setting to true disables the algorithm and may decrease latency
+  with a cost of more/smaller packets. 
+  </td>
+</tr>
+<tr>
+<td><a name="job.end.retry.attempts">job.end.retry.attempts</a></td><td>0</td><td>Indicates how many times hadoop should attempt to contact the
+               notification URL </td>
+</tr>
+<tr>
+<td><a name="job.end.retry.interval">job.end.retry.interval</a></td><td>30000</td><td>Indicates time in milliseconds between notification URL retry
+                calls</td>
+</tr>
+<tr>
+<td><a name="webinterface.private.actions">webinterface.private.actions</a></td><td>false</td><td> If set to true, the web interfaces of JT and NN may contain 
+                actions, such as kill job, delete file, etc., that should 
+                not be exposed to public. Enable this option if the interfaces 
+                are only reachable by those who have the right authorization.
+  </td>
+</tr>
+<tr>
+<td><a name="hadoop.rpc.socket.factory.class.default">hadoop.rpc.socket.factory.class.default</a></td><td>org.apache.hadoop.net.StandardSocketFactory</td><td> Default SocketFactory to use. This parameter is expected to be
+    formatted as "package.FactoryClassName".
+  </td>
+</tr>
+<tr>
+<td><a name="hadoop.rpc.socket.factory.class.ClientProtocol">hadoop.rpc.socket.factory.class.ClientProtocol</a></td><td></td><td> SocketFactory to use to connect to a DFS. If null or empty, use
+    hadoop.rpc.socket.class.default. This socket factory is also used by
+    DFSClient to create sockets to DataNodes.
+  </td>
+</tr>
+<tr>
+<td><a name="hadoop.rpc.socket.factory.class.JobSubmissionProtocol">hadoop.rpc.socket.factory.class.JobSubmissionProtocol</a></td><td></td><td> SocketFactory to use to connect to a Map/Reduce master
+    (JobTracker). If null or empty, then use hadoop.rpc.socket.class.default.
+  </td>
+</tr>
+<tr>
+<td><a name="hadoop.socks.server">hadoop.socks.server</a></td><td></td><td> Address (host:port) of the SOCKS server to be used by the
+    SocksSocketFactory.
+  </td>
+</tr>
+<tr>
+<td><a name="topology.node.switch.mapping.impl">topology.node.switch.mapping.impl</a></td><td>org.apache.hadoop.net.ScriptBasedMapping</td><td> The default implementation of the DNSToSwitchMapping. It
+    invokes a script specified in topology.script.file.name to resolve
+    node names. If the value for topology.script.file.name is not set, the
+    default value of DEFAULT_RACK is returned for all node names.
+  </td>
+</tr>
+<tr>
+<td><a name="topology.script.file.name">topology.script.file.name</a></td><td></td><td> The script name that should be invoked to resolve DNS names to
+    NetworkTopology names. Example: the script would take host.foo.bar as an
+    argument, and return /rack1 as the output.
+  </td>
+</tr>
+<tr>
+<td><a name="topology.script.number.args">topology.script.number.args</a></td><td>100</td><td> The max number of args that the script configured with 
+    topology.script.file.name should be run with. Each arg is an
+    IP address.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.task.cache.levels">mapred.task.cache.levels</a></td><td>2</td><td> This is the max level of the task cache. For example, if
+    the level is 2, the tasks cached are at the host level and at the rack
+    level.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.queue.names">mapred.queue.names</a></td><td>default</td><td> Comma separated list of queues configured for this jobtracker.
+    Jobs are added to queues and schedulers can configure different 
+    scheduling properties for the various queues. To configure a property 
+    for a queue, the name of the queue must match the name specified in this 
+    value. Queue properties that are common to all schedulers are configured 
+    here with the naming convention, mapred.queue.$QUEUE-NAME.$PROPERTY-NAME,
+    for e.g. mapred.queue.default.submit-job-acl.
+    The number of queues configured in this parameter could depend on the
+    type of scheduler being used, as specified in 
+    mapred.jobtracker.taskScheduler. For example, the JobQueueTaskScheduler
+    supports only a single queue, which is the default configured here.
+    Before adding more queues, ensure that the scheduler you've configured
+    supports multiple queues.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.acls.enabled">mapred.acls.enabled</a></td><td>false</td><td> Specifies whether ACLs are enabled, and should be checked
+    for various operations.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.queue.default.acl-submit-job">mapred.queue.default.acl-submit-job</a></td><td>*</td><td> Comma separated list of user and group names that are allowed
+    to submit jobs to the 'default' queue. The user list and the group list
+    are separated by a blank. For e.g. alice,bob group1,group2. 
+    If set to the special value '*', it means all users are allowed to 
+    submit jobs. 
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.queue.default.acl-administer-jobs">mapred.queue.default.acl-administer-jobs</a></td><td>*</td><td> Comma separated list of user and group names that are allowed
+    to delete jobs or modify job's priority for jobs not owned by the current
+    user in the 'default' queue. The user list and the group list
+    are separated by a blank. For e.g. alice,bob group1,group2. 
+    If set to the special value '*', it means all users are allowed to do 
+    this operation.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.job.queue.name">mapred.job.queue.name</a></td><td>default</td><td> Queue to which a job is submitted. This must match one of the
+    queues defined in mapred.queue.names for the system. Also, the ACL setup
+    for the queue must allow the current user to submit a job to the queue.
+    Before specifying a queue, ensure that the system is configured with 
+    the queue, and access is allowed for submitting jobs to the queue.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.tasktracker.indexcache.mb">mapred.tasktracker.indexcache.mb</a></td><td>10</td><td> The maximum memory that a task tracker allows for the 
+    index cache that is used when serving map outputs to reducers.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.merge.recordsBeforeProgress">mapred.merge.recordsBeforeProgress</a></td><td>10000</td><td> The number of records to process during merge before
+   sending a progress notification to the TaskTracker.
+  </td>
+</tr>
+</table>
+</body>
+</html>

+ 302 - 0
docs/cn/hadoop_archives.html

@@ -0,0 +1,302 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Hadoop Archives</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://hadoop.apache.org/core/">项目</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop">维基</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.18文档</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">文档</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">概述</a>
+</div>
+<div class="menuitem">
+<a href="quickstart.html">快速入门</a>
+</div>
+<div class="menuitem">
+<a href="cluster_setup.html">集群搭建</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_design.html">HDFS构架设计</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_user_guide.html">HDFS使用指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_permissions_guide.html">HDFS权限指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_quota_admin_guide.html">HDFS配额管理指南</a>
+</div>
+<div class="menuitem">
+<a href="commands_manual.html">命令手册</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_shell.html">FS Shell使用指南</a>
+</div>
+<div class="menuitem">
+<a href="distcp.html">DistCp使用指南</a>
+</div>
+<div class="menuitem">
+<a href="mapred_tutorial.html">Map-Reduce教程</a>
+</div>
+<div class="menuitem">
+<a href="native_libraries.html">Hadoop本地库</a>
+</div>
+<div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">Hadoop Archives</div>
+</div>
+<div class="menuitem">
+<a href="hod.html">Hadoop On Demand</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/">维基</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">发行说明</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">变更日志</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="hadoop_archives.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Hadoop Archives</h1>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#%E4%BB%80%E4%B9%88%E6%98%AFHadoop+archives%3F"> 什么是Hadoop archives? </a>
+</li>
+<li>
+<a href="#%E5%A6%82%E4%BD%95%E5%88%9B%E5%BB%BAarchive%3F"> 如何创建archive? </a>
+</li>
+<li>
+<a href="#%E5%A6%82%E4%BD%95%E6%9F%A5%E7%9C%8Barchives%E4%B8%AD%E7%9A%84%E6%96%87%E4%BB%B6%3F"> 如何查看archives中的文件? </a>
+</li>
+</ul>
+</div>
+        
+<a name="N1000D"></a><a name="%E4%BB%80%E4%B9%88%E6%98%AFHadoop+archives%3F"></a>
+<h2 class="h3"> 什么是Hadoop archives? </h2>
+<div class="section">
+<p>
+        Hadoop archives是特殊的档案格式。一个Hadoop archive对应一个文件系统目录。
+        Hadoop archive的扩展名是*.har。Hadoop archive包含元数据(形式是_index和_masterindx)和数据(part-*)文件。_index文件包含了档案中的文件的文件名和位置信息。
+        </p>
+</div>
+        
+<a name="N10017"></a><a name="%E5%A6%82%E4%BD%95%E5%88%9B%E5%BB%BAarchive%3F"></a>
+<h2 class="h3"> 如何创建archive? </h2>
+<div class="section">
+<p>
+        
+<span class="codefrag">用法: hadoop archive -archiveName name &lt;src&gt;* &lt;dest&gt;</span>
+        
+</p>
+<p>
+        由-archiveName选项指定你要创建的archive的名字。比如foo.har。archive的名字的扩展名应该是*.har。输入是文件系统的路径名,路径名的格式和平时的表达方式一样。创建的archive会保存到目标目录下。注意创建archives是一个Map/Reduce job。你应该在map reduce集群上运行这个命令。下面是一个例子:
+        </p>
+<p>
+        
+<span class="codefrag">hadoop archive -archiveName foo.har /user/hadoop/dir1 /user/hadoop/dir2 /user/zoo/</span>
+        
+</p>
+<p>
+        在上面的例子中,
+        /user/hadoop/dir1 和 /user/hadoop/dir2 会被归档到这个文件系统目录下
+        -- /user/zoo/foo.har。当创建archive时,源文件不会被更改或删除。
+        </p>
+</div>
+        
+<a name="N1002F"></a><a name="%E5%A6%82%E4%BD%95%E6%9F%A5%E7%9C%8Barchives%E4%B8%AD%E7%9A%84%E6%96%87%E4%BB%B6%3F"></a>
+<h2 class="h3"> 如何查看archives中的文件? </h2>
+<div class="section">
+<p>
+        archive作为文件系统层暴露给外界。所以所有的fs shell命令都能在archive上运行,但是要使用不同的URI。
+        另外,archive是不可改变的。所以重命名,删除和创建都会返回错误。Hadoop Archives 的URI是
+        </p>
+<p>
+<span class="codefrag">har://scheme-hostname:port/archivepath/fileinarchive</span>
+</p>
+<p>
+        如果没提供scheme-hostname,它会使用默认的文件系统。这种情况下URI是这种形式
+        </p>
+<p>
+<span class="codefrag">
+        har:///archivepath/fileinarchive</span>
+</p>
+<p>
+        这是一个archive的例子。archive的输入是/dir。这个dir目录包含文件filea,fileb。
+        把/dir归档到/user/hadoop/foo.bar的命令是
+        </p>
+<p>
+<span class="codefrag">hadoop archive -archiveName foo.har /dir /user/hadoop</span>
+        
+</p>
+<p>
+        获得创建的archive中的文件列表,使用命令
+        </p>
+<p>
+<span class="codefrag">hadoop dfs -lsr har:///user/hadoop/foo.har</span>
+</p>
+<p>查看archive中的filea文件的命令-
+        </p>
+<p>
+<span class="codefrag">hadoop dfs -cat har:///user/hadoop/foo.har/dir/filea</span>
+</p>
+</div>
+	
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 137 - 0
docs/cn/hadoop_archives.pdf


+ 664 - 0
docs/cn/hdfs_design.html

@@ -0,0 +1,664 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title> 
+      Hadoop分布式文件系统:架构和设计
+    </title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://hadoop.apache.org/core/">项目</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop">维基</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.18文档</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">文档</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">概述</a>
+</div>
+<div class="menuitem">
+<a href="quickstart.html">快速入门</a>
+</div>
+<div class="menuitem">
+<a href="cluster_setup.html">集群搭建</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">HDFS构架设计</div>
+</div>
+<div class="menuitem">
+<a href="hdfs_user_guide.html">HDFS使用指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_permissions_guide.html">HDFS权限指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_quota_admin_guide.html">HDFS配额管理指南</a>
+</div>
+<div class="menuitem">
+<a href="commands_manual.html">命令手册</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_shell.html">FS Shell使用指南</a>
+</div>
+<div class="menuitem">
+<a href="distcp.html">DistCp使用指南</a>
+</div>
+<div class="menuitem">
+<a href="mapred_tutorial.html">Map-Reduce教程</a>
+</div>
+<div class="menuitem">
+<a href="native_libraries.html">Hadoop本地库</a>
+</div>
+<div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
+<a href="hadoop_archives.html">Hadoop Archives</a>
+</div>
+<div class="menuitem">
+<a href="hod.html">Hadoop On Demand</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/">维基</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">发行说明</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">变更日志</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="hdfs_design.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1> 
+      Hadoop分布式文件系统:架构和设计
+    </h1>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#%E5%BC%95%E8%A8%80"> 引言 </a>
+</li>
+<li>
+<a href="#%E5%89%8D%E6%8F%90%E5%92%8C%E8%AE%BE%E8%AE%A1%E7%9B%AE%E6%A0%87"> 前提和设计目标 </a>
+<ul class="minitoc">
+<li>
+<a href="#%E7%A1%AC%E4%BB%B6%E9%94%99%E8%AF%AF"> 硬件错误 </a>
+</li>
+<li>
+<a href="#%E6%B5%81%E5%BC%8F%E6%95%B0%E6%8D%AE%E8%AE%BF%E9%97%AE"> 流式数据访问 </a>
+</li>
+<li>
+<a href="#%E5%A4%A7%E8%A7%84%E6%A8%A1%E6%95%B0%E6%8D%AE%E9%9B%86"> 大规模数据集 </a>
+</li>
+<li>
+<a href="#%E7%AE%80%E5%8D%95%E7%9A%84%E4%B8%80%E8%87%B4%E6%80%A7%E6%A8%A1%E5%9E%8B"> 简单的一致性模型 </a>
+</li>
+<li>
+<a href="#%E2%80%9C%E7%A7%BB%E5%8A%A8%E8%AE%A1%E7%AE%97%E6%AF%94%E7%A7%BB%E5%8A%A8%E6%95%B0%E6%8D%AE%E6%9B%B4%E5%88%92%E7%AE%97%E2%80%9D"> &ldquo;移动计算比移动数据更划算&rdquo; </a>
+</li>
+<li>
+<a href="#%E5%BC%82%E6%9E%84%E8%BD%AF%E7%A1%AC%E4%BB%B6%E5%B9%B3%E5%8F%B0%E9%97%B4%E7%9A%84%E5%8F%AF%E7%A7%BB%E6%A4%8D%E6%80%A7"> 异构软硬件平台间的可移植性 </a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#Namenode+%E5%92%8C+Datanode"> Namenode 和 Datanode </a>
+</li>
+<li>
+<a href="#%E6%96%87%E4%BB%B6%E7%B3%BB%E7%BB%9F%E7%9A%84%E5%90%8D%E5%AD%97%E7%A9%BA%E9%97%B4+%28namespace%29"> 文件系统的名字空间 (namespace) </a>
+</li>
+<li>
+<a href="#%E6%95%B0%E6%8D%AE%E5%A4%8D%E5%88%B6"> 数据复制 </a>
+<ul class="minitoc">
+<li>
+<a href="#%E5%89%AF%E6%9C%AC%E5%AD%98%E6%94%BE%3A+%E6%9C%80%E6%9C%80%E5%BC%80%E5%A7%8B%E7%9A%84%E4%B8%80%E6%AD%A5"> 副本存放: 最最开始的一步 </a>
+</li>
+<li>
+<a href="#%E5%89%AF%E6%9C%AC%E9%80%89%E6%8B%A9"> 副本选择 </a>
+</li>
+<li>
+<a href="#%E5%AE%89%E5%85%A8%E6%A8%A1%E5%BC%8F"> 安全模式 </a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#%E6%96%87%E4%BB%B6%E7%B3%BB%E7%BB%9F%E5%85%83%E6%95%B0%E6%8D%AE%E7%9A%84%E6%8C%81%E4%B9%85%E5%8C%96"> 文件系统元数据的持久化 </a>
+</li>
+<li>
+<a href="#%E9%80%9A%E8%AE%AF%E5%8D%8F%E8%AE%AE"> 通讯协议 </a>
+</li>
+<li>
+<a href="#%E5%81%A5%E5%A3%AE%E6%80%A7"> 健壮性 </a>
+<ul class="minitoc">
+<li>
+<a href="#%E7%A3%81%E7%9B%98%E6%95%B0%E6%8D%AE%E9%94%99%E8%AF%AF%EF%BC%8C%E5%BF%83%E8%B7%B3%E6%A3%80%E6%B5%8B%E5%92%8C%E9%87%8D%E6%96%B0%E5%A4%8D%E5%88%B6"> 磁盘数据错误,心跳检测和重新复制 </a>
+</li>
+<li>
+<a href="#%E9%9B%86%E7%BE%A4%E5%9D%87%E8%A1%A1"> 集群均衡 </a>
+</li>
+<li>
+<a href="#%E6%95%B0%E6%8D%AE%E5%AE%8C%E6%95%B4%E6%80%A7"> 数据完整性 </a>
+</li>
+<li>
+<a href="#%E5%85%83%E6%95%B0%E6%8D%AE%E7%A3%81%E7%9B%98%E9%94%99%E8%AF%AF"> 元数据磁盘错误 </a>
+</li>
+<li>
+<a href="#%E5%BF%AB%E7%85%A7"> 快照 </a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#%E6%95%B0%E6%8D%AE%E7%BB%84%E7%BB%87"> 数据组织 </a>
+<ul class="minitoc">
+<li>
+<a href="#%E6%95%B0%E6%8D%AE%E5%9D%97"> 数据块 </a>
+</li>
+<li>
+<a href="#Staging"> Staging </a>
+</li>
+<li>
+<a href="#%E6%B5%81%E6%B0%B4%E7%BA%BF%E5%A4%8D%E5%88%B6"> 流水线复制 </a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#%E5%8F%AF%E8%AE%BF%E9%97%AE%E6%80%A7"> 可访问性 </a>
+<ul class="minitoc">
+<li>
+<a href="#DFSShell"> DFSShell </a>
+</li>
+<li>
+<a href="#DFSAdmin"> DFSAdmin </a>
+</li>
+<li>
+<a href="#%E6%B5%8F%E8%A7%88%E5%99%A8%E6%8E%A5%E5%8F%A3"> 浏览器接口 </a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#%E5%AD%98%E5%82%A8%E7%A9%BA%E9%97%B4%E5%9B%9E%E6%94%B6"> 存储空间回收 </a>
+<ul class="minitoc">
+<li>
+<a href="#%E6%96%87%E4%BB%B6%E7%9A%84%E5%88%A0%E9%99%A4%E5%92%8C%E6%81%A2%E5%A4%8D"> 文件的删除和恢复 </a>
+</li>
+<li>
+<a href="#%E5%87%8F%E5%B0%91%E5%89%AF%E6%9C%AC%E7%B3%BB%E6%95%B0"> 减少副本系数 </a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#%E5%8F%82%E8%80%83%E8%B5%84%E6%96%99"> 参考资料 </a>
+</li>
+</ul>
+</div>
+    
+<a name="N10014"></a><a name="%E5%BC%95%E8%A8%80"></a>
+<h2 class="h3"> 引言 </h2>
+<div class="section">
+<p>
+	      Hadoop分布式文件系统(<acronym title="Hadoop分布式文件系统">HDFS</acronym>)被设计成适合运行在通用硬件(commodity hardware)上的分布式文件系统。它和现有的分布式文件系统有很多共同点。但同时,它和其他的分布式文件系统的区别也是很明显的。HDFS是一个高度容错性的系统,适合部署在廉价的机器上。HDFS能提供高吞吐量的数据访问,非常适合大规模数据集上的应用。HDFS放宽了一部分POSIX约束,来实现流式读取文件系统数据的目的。HDFS在最开始是作为Apache Nutch搜索引擎项目的基础架构而开发的。HDFS是Apache Hadoop Core项目的一部分。这个项目的地址是<a href="http://hadoop.apache.org/core/">http://hadoop.apache.org/core/</a>。
+      </p>
+</div>
+
+    
+<a name="N10026"></a><a name="%E5%89%8D%E6%8F%90%E5%92%8C%E8%AE%BE%E8%AE%A1%E7%9B%AE%E6%A0%87"></a>
+<h2 class="h3"> 前提和设计目标 </h2>
+<div class="section">
+<a name="N1002C"></a><a name="%E7%A1%AC%E4%BB%B6%E9%94%99%E8%AF%AF"></a>
+<h3 class="h4"> 硬件错误 </h3>
+<p>
+	硬件错误是常态而不是异常。HDFS可能由成百上千的服务器所构成,每个服务器上存储着文件系统的部分数据。我们面对的现实是构成系统的组件数目是巨大的,而且任一组件都有可能失效,这意味着总是有一部分HDFS的组件是不工作的。因此错误检测和快速、自动的恢复是HDFS最核心的架构目标。
+       </p>
+<a name="N10036"></a><a name="%E6%B5%81%E5%BC%8F%E6%95%B0%E6%8D%AE%E8%AE%BF%E9%97%AE"></a>
+<h3 class="h4"> 流式数据访问 </h3>
+<p>
+运行在HDFS上的应用和普通的应用不同,需要流式访问它们的数据集。HDFS的设计中更多的考虑到了数据批处理,而不是用户交互处理。比之数据访问的低延迟问题,更关键的在于数据访问的高吞吐量。POSIX标准设置的很多硬性约束对HDFS应用系统不是必需的。为了提高数据的吞吐量,在一些关键方面对POSIX的语义做了一些修改。        
+        </p>
+<a name="N10040"></a><a name="%E5%A4%A7%E8%A7%84%E6%A8%A1%E6%95%B0%E6%8D%AE%E9%9B%86"></a>
+<h3 class="h4"> 大规模数据集 </h3>
+<p>
+        运行在HDFS上的应用具有很大的数据集。HDFS上的一个典型文件大小一般都在G字节至T字节。因此,HDFS被调节以支持大文件存储。它应该能提供整体上高的数据传输带宽,能在一个集群里扩展到数百个节点。一个单一的HDFS实例应该能支撑数以千万计的文件。
+        </p>
+<a name="N1004A"></a><a name="%E7%AE%80%E5%8D%95%E7%9A%84%E4%B8%80%E8%87%B4%E6%80%A7%E6%A8%A1%E5%9E%8B"></a>
+<h3 class="h4"> 简单的一致性模型 </h3>
+<p>
+        HDFS应用需要一个&ldquo;一次写入多次读取&rdquo;的文件访问模型。一个文件经过创建、写入和关闭之后就不需要改变。这一假设简化了数据一致性问题,并且使高吞吐量的数据访问成为可能。Map/Reduce应用或者网络爬虫应用都非常适合这个模型。目前还有计划在将来扩充这个模型,使之支持文件的附加写操作。 
+        </p>
+<a name="N10058"></a><a name="%E2%80%9C%E7%A7%BB%E5%8A%A8%E8%AE%A1%E7%AE%97%E6%AF%94%E7%A7%BB%E5%8A%A8%E6%95%B0%E6%8D%AE%E6%9B%B4%E5%88%92%E7%AE%97%E2%80%9D"></a>
+<h3 class="h4"> &ldquo;移动计算比移动数据更划算&rdquo; </h3>
+<p>
+        一个应用请求的计算,离它操作的数据越近就越高效,在数据达到海量级别的时候更是如此。因为这样就能降低网络阻塞的影响,提高系统数据的吞吐量。将计算移动到数据附近,比之将数据移动到应用所在显然更好。HDFS为应用提供了将它们自己移动到数据附近的接口。 
+        </p>
+<a name="N10062"></a><a name="%E5%BC%82%E6%9E%84%E8%BD%AF%E7%A1%AC%E4%BB%B6%E5%B9%B3%E5%8F%B0%E9%97%B4%E7%9A%84%E5%8F%AF%E7%A7%BB%E6%A4%8D%E6%80%A7"></a>
+<h3 class="h4"> 异构软硬件平台间的可移植性 </h3>
+<p>
+        HDFS在设计的时候就考虑到平台的可移植性。这种特性方便了HDFS作为大规模数据应用平台的推广。
+        </p>
+</div>
+
+ 
+    
+<a name="N1006D"></a><a name="Namenode+%E5%92%8C+Datanode"></a>
+<h2 class="h3"> Namenode 和 Datanode </h2>
+<div class="section">
+<p>
+      HDFS采用master/slave架构。一个HDFS集群是由一个Namenode和一定数目的Datanodes组成。Namenode是一个中心服务器,负责管理文件系统的名字空间(namespace)以及客户端对文件的访问。集群中的Datanode一般是一个节点一个,负责管理它所在节点上的存储。HDFS暴露了文件系统的名字空间,用户能够以文件的形式在上面存储数据。从内部看,一个文件其实被分成一个或多个数据块,这些块存储在一组Datanode上。Namenode执行文件系统的名字空间操作,比如打开、关闭、重命名文件或目录。它也负责确定数据块到具体Datanode节点的映射。Datanode负责处理文件系统客户端的读写请求。在Namenode的统一调度下进行数据块的创建、删除和复制。
+      </p>
+<div id="" style="text-align: center;">
+<img id="" class="figure" alt="HDFS 架构" src="images/hdfsarchitecture.gif"></div>
+<p>
+      Namenode和Datanode被设计成可以在普通的商用机器上运行。这些机器一般运行着GNU/Linux操作系统(<acronym title="操作系统">OS</acronym>)。HDFS采用Java语言开发,因此任何支持Java的机器都可以部署Namenode或Datanode。由于采用了可移植性极强的Java语言,使得HDFS可以部署到多种类型的机器上。一个典型的部署场景是一台机器上只运行一个Namenode实例,而集群中的其它机器分别运行一个Datanode实例。这种架构并不排斥在一台机器上运行多个Datanode,只不过这样的情况比较少见。
+      </p>
+<p>
+      集群中单一Namenode的结构大大简化了系统的架构。Namenode是所有HDFS元数据的仲裁者和管理者,这样,用户数据永远不会流过Namenode。
+      </p>
+</div> 
+
+    
+<a name="N10089"></a><a name="%E6%96%87%E4%BB%B6%E7%B3%BB%E7%BB%9F%E7%9A%84%E5%90%8D%E5%AD%97%E7%A9%BA%E9%97%B4+%28namespace%29"></a>
+<h2 class="h3"> 文件系统的名字空间 (namespace) </h2>
+<div class="section">
+<p>
+      HDFS支持传统的层次型文件组织结构。用户或者应用程序可以创建目录,然后将文件保存在这些目录里。文件系统名字空间的层次结构和大多数现有的文件系统类似:用户可以创建、删除、移动或重命名文件。当前,HDFS不支持用户磁盘配额和访问权限控制,也不支持硬链接和软链接。但是HDFS架构并不妨碍实现这些特性。
+      </p>
+<p>
+      Namenode负责维护文件系统的名字空间,任何对文件系统名字空间或属性的修改都将被Namenode记录下来。应用程序可以设置HDFS保存的文件的副本数目。文件副本的数目称为文件的副本系数,这个信息也是由Namenode保存的。
+      </p>
+</div>
+
+    
+<a name="N10096"></a><a name="%E6%95%B0%E6%8D%AE%E5%A4%8D%E5%88%B6"></a>
+<h2 class="h3"> 数据复制 </h2>
+<div class="section">
+<p>
+      HDFS被设计成能够在一个大集群中跨机器可靠地存储超大文件。它将每个文件存储成一系列的数据块,除了最后一个,所有的数据块都是同样大小的。为了容错,文件的所有数据块都会有副本。每个文件的数据块大小和副本系数都是可配置的。应用程序可以指定某个文件的副本数目。副本系数可以在文件创建的时候指定,也可以在之后改变。HDFS中的文件都是一次性写入的,并且严格要求在任何时候只能有一个写入者。 
+      </p>
+<p>
+      Namenode全权管理数据块的复制,它周期性地从集群中的每个Datanode接收心跳信号和块状态报告(Blockreport)。接收到心跳信号意味着该Datanode节点工作正常。块状态报告包含了一个该Datanode上所有数据块的列表。
+    </p>
+<div id="" style="text-align: center;">
+<img id="" class="figure" alt="HDFS Datanodes" src="images/hdfsdatanodes.gif"></div>
+<a name="N100A6"></a><a name="%E5%89%AF%E6%9C%AC%E5%AD%98%E6%94%BE%3A+%E6%9C%80%E6%9C%80%E5%BC%80%E5%A7%8B%E7%9A%84%E4%B8%80%E6%AD%A5"></a>
+<h3 class="h4"> 副本存放: 最最开始的一步 </h3>
+<p>
+        副本的存放是HDFS可靠性和性能的关键。优化的副本存放策略是HDFS区分于其他大部分分布式文件系统的重要特性。这种特性需要做大量的调优,并需要经验的积累。HDFS采用一种称为机架感知(rack-aware)的策略来改进数据的可靠性、可用性和网络带宽的利用率。目前实现的副本存放策略只是在这个方向上的第一步。实现这个策略的短期目标是验证它在生产环境下的有效性,观察它的行为,为实现更先进的策略打下测试和研究的基础。 
+        </p>
+<p>
+	大型HDFS实例一般运行在跨越多个机架的计算机组成的集群上,不同机架上的两台机器之间的通讯需要经过交换机。在大多数情况下,同一个机架内的两台机器间的带宽会比不同机架的两台机器间的带宽大。        
+        </p>
+<p>
+        通过一个<a href="cluster_setup.html#Hadoop%E7%9A%84%E6%9C%BA%E6%9E%B6%E6%84%9F%E7%9F%A5">机架感知</a>的过程,Namenode可以确定每个Datanode所属的机架id。一个简单但没有优化的策略就是将副本存放在不同的机架上。这样可以有效防止当整个机架失效时数据的丢失,并且允许读数据的时候充分利用多个机架的带宽。这种策略设置可以将副本均匀分布在集群中,有利于当组件失效情况下的负载均衡。但是,因为这种策略的一个写操作需要传输数据块到多个机架,这增加了写的代价。 
+        </p>
+<p>
+        在大多数情况下,副本系数是3,HDFS的存放策略是将一个副本存放在本地机架的节点上,一个副本放在同一机架的另一个节点上,最后一个副本放在不同机架的节点上。这种策略减少了机架间的数据传输,这就提高了写操作的效率。机架的错误远远比节点的错误少,所以这个策略不会影响到数据的可靠性和可用性。于此同时,因为数据块只放在两个(不是三个)不同的机架上,所以此策略减少了读取数据时需要的网络传输总带宽。在这种策略下,副本并不是均匀分布在不同的机架上。三分之一的副本在一个节点上,三分之二的副本在一个机架上,其他副本均匀分布在剩下的机架中,这一策略在不损害数据可靠性和读取性能的情况下改进了写的性能。
+        </p>
+<p>
+        当前,这里介绍的默认副本存放策略正在开发的过程中。
+        </p>
+<a name="N100C0"></a><a name="%E5%89%AF%E6%9C%AC%E9%80%89%E6%8B%A9"></a>
+<h3 class="h4"> 副本选择 </h3>
+<p>
+        为了降低整体的带宽消耗和读取延时,HDFS会尽量让读取程序读取离它最近的副本。如果在读取程序的同一个机架上有一个副本,那么就读取该副本。如果一个HDFS集群跨越多个数据中心,那么客户端也将首先读本地数据中心的副本。
+        </p>
+<a name="N100CA"></a><a name="%E5%AE%89%E5%85%A8%E6%A8%A1%E5%BC%8F"></a>
+<h3 class="h4"> 安全模式 </h3>
+<p>
+	Namenode启动后会进入一个称为安全模式的特殊状态。处于安全模式的Namenode是不会进行数据块的复制的。Namenode从所有的 Datanode接收心跳信号和块状态报告。块状态报告包括了某个Datanode所有的数据块列表。每个数据块都有一个指定的最小副本数。当Namenode检测确认某个数据块的副本数目达到这个最小值,那么该数据块就会被认为是副本安全(safely replicated)的;在一定百分比(这个参数可配置)的数据块被Namenode检测确认是安全之后(加上一个额外的30秒等待时间),Namenode将退出安全模式状态。接下来它会确定还有哪些数据块的副本没有达到指定数目,并将这些数据块复制到其他Datanode上。
+        </p>
+</div>
+
+    
+<a name="N100D5"></a><a name="%E6%96%87%E4%BB%B6%E7%B3%BB%E7%BB%9F%E5%85%83%E6%95%B0%E6%8D%AE%E7%9A%84%E6%8C%81%E4%B9%85%E5%8C%96"></a>
+<h2 class="h3"> 文件系统元数据的持久化 </h2>
+<div class="section">
+<p>
+	Namenode上保存着HDFS的名字空间。对于任何对文件系统元数据产生修改的操作,Namenode都会使用一种称为EditLog的事务日志记录下来。例如,在HDFS中创建一个文件,Namenode就会在Editlog中插入一条记录来表示;同样地,修改文件的副本系数也将往Editlog插入一条记录。Namenode在本地操作系统的文件系统中存储这个Editlog。整个文件系统的名字空间,包括数据块到文件的映射、文件的属性等,都存储在一个称为FsImage的文件中,这个文件也是放在Namenode所在的本地文件系统上。
+        </p>
+<p>
+        Namenode在内存中保存着整个文件系统的名字空间和文件数据块映射(Blockmap)的映像。这个关键的元数据结构设计得很紧凑,因而一个有4G内存的Namenode足够支撑大量的文件和目录。当Namenode启动时,它从硬盘中读取Editlog和FsImage,将所有Editlog中的事务作用在内存中的FsImage上,并将这个新版本的FsImage从内存中保存到本地磁盘上,然后删除旧的Editlog,因为这个旧的Editlog的事务都已经作用在FsImage上了。这个过程称为一个检查点(checkpoint)。在当前实现中,检查点只发生在Namenode启动时,在不久的将来将实现支持周期性的检查点。
+        </p>
+<p>
+	Datanode将HDFS数据以文件的形式存储在本地的文件系统中,它并不知道有关HDFS文件的信息。它把每个HDFS数据块存储在本地文件系统的一个单独的文件中。Datanode并不在同一个目录创建所有的文件,实际上,它用试探的方法来确定每个目录的最佳文件数目,并且在适当的时候创建子目录。在同一个目录中创建所有的本地文件并不是最优的选择,这是因为本地文件系统可能无法高效地在单个目录中支持大量的文件。当一个Datanode启动时,它会扫描本地文件系统,产生一个这些本地文件对应的所有HDFS数据块的列表,然后作为报告发送到Namenode,这个报告就是块状态报告。         
+        </p>
+</div>
+
+    
+<a name="N100E5"></a><a name="%E9%80%9A%E8%AE%AF%E5%8D%8F%E8%AE%AE"></a>
+<h2 class="h3"> 通讯协议 </h2>
+<div class="section">
+<p>
+      所有的HDFS通讯协议都是建立在TCP/IP协议之上。客户端通过一个可配置的<acronym title="Transmission Control Protocol">TCP</acronym>端口连接到Namenode,通过ClientProtocol协议与Namenode交互。而Datanode使用DatanodeProtocol协议与Namenode交互。一个远程过程调用(<acronym title="Remote Procedure Call">RPC</acronym>)模型被抽象出来封装ClientProtocol和Datanodeprotocol协议。在设计上,Namenode不会主动发起RPC,而是响应来自客户端或 Datanode 的RPC请求。 
+      </p>
+</div> 
+
+    
+<a name="N100F7"></a><a name="%E5%81%A5%E5%A3%AE%E6%80%A7"></a>
+<h2 class="h3"> 健壮性 </h2>
+<div class="section">
+<p>
+	      HDFS的主要目标就是即使在出错的情况下也要保证数据存储的可靠性。常见的三种出错情况是:Namenode出错, Datanode出错和网络割裂(network partitions)。
+      </p>
+<a name="N10100"></a><a name="%E7%A3%81%E7%9B%98%E6%95%B0%E6%8D%AE%E9%94%99%E8%AF%AF%EF%BC%8C%E5%BF%83%E8%B7%B3%E6%A3%80%E6%B5%8B%E5%92%8C%E9%87%8D%E6%96%B0%E5%A4%8D%E5%88%B6"></a>
+<h3 class="h4"> 磁盘数据错误,心跳检测和重新复制 </h3>
+<p>
+        每个Datanode节点周期性地向Namenode发送心跳信号。网络割裂可能导致一部分Datanode跟Namenode失去联系。Namenode通过心跳信号的缺失来检测这一情况,并将这些近期不再发送心跳信号Datanode标记为宕机,不会再将新的<acronym title="Input/Output">IO</acronym>请求发给它们。任何存储在宕机Datanode上的数据将不再有效。Datanode的宕机可能会引起一些数据块的副本系数低于指定值,Namenode不断地检测这些需要复制的数据块,一旦发现就启动复制操作。在下列情况下,可能需要重新复制:某个Datanode节点失效,某个副本遭到损坏,Datanode上的硬盘错误,或者文件的副本系数增大。
+        </p>
+<a name="N1010E"></a><a name="%E9%9B%86%E7%BE%A4%E5%9D%87%E8%A1%A1"></a>
+<h3 class="h4"> 集群均衡 </h3>
+<p>
+        HDFS的架构支持数据均衡策略。如果某个Datanode节点上的空闲空间低于特定的临界点,按照均衡策略系统就会自动地将数据从这个Datanode移动到其他空闲的Datanode。当对某个文件的请求突然增加,那么也可能启动一个计划创建该文件新的副本,并且同时重新平衡集群中的其他数据。这些均衡策略目前还没有实现。
+        </p>
+<a name="N10118"></a><a name="%E6%95%B0%E6%8D%AE%E5%AE%8C%E6%95%B4%E6%80%A7"></a>
+<h3 class="h4"> 数据完整性 </h3>
+<p>
+        <!-- XXX "checksum checking" sounds funny -->
+        从某个Datanode获取的数据块有可能是损坏的,损坏可能是由Datanode的存储设备错误、网络错误或者软件bug造成的。HDFS客户端软件实现了对HDFS文件内容的校验和(checksum)检查。当客户端创建一个新的HDFS文件,会计算这个文件每个数据块的校验和,并将校验和作为一个单独的隐藏文件保存在同一个HDFS名字空间下。当客户端获取文件内容后,它会检验从Datanode获取的数据跟相应的校验和文件中的校验和是否匹配,如果不匹配,客户端可以选择从其他Datanode获取该数据块的副本。
+        </p>
+<a name="N10124"></a><a name="%E5%85%83%E6%95%B0%E6%8D%AE%E7%A3%81%E7%9B%98%E9%94%99%E8%AF%AF"></a>
+<h3 class="h4"> 元数据磁盘错误 </h3>
+<p>
+        FsImage和Editlog是HDFS的核心数据结构。如果这些文件损坏了,整个HDFS实例都将失效。因而,Namenode可以配置成支持维护多个FsImage和Editlog的副本。任何对FsImage或者Editlog的修改,都将同步到它们的副本上。这种多副本的同步操作可能会降低Namenode每秒处理的名字空间事务数量。然而这个代价是可以接受的,因为即使HDFS的应用是数据密集的,它们也非元数据密集的。当Namenode重启的时候,它会选取最近的完整的FsImage和Editlog来使用。
+        </p>
+<p> 
+        Namenode是HDFS集群中的单点故障(single point of failure)所在。如果Namenode机器故障,是需要手工干预的。目前,自动重启或在另一台机器上做Namenode故障转移的功能还没实现。
+        </p>
+<a name="N10131"></a><a name="%E5%BF%AB%E7%85%A7"></a>
+<h3 class="h4"> 快照 </h3>
+<p>
+        快照支持某一特定时刻的数据的复制备份。利用快照,可以让HDFS在数据损坏时恢复到过去一个已知正确的时间点。HDFS目前还不支持快照功能,但计划在将来的版本进行支持。
+        </p>
+</div>
+    
+<a name="N1013C"></a><a name="%E6%95%B0%E6%8D%AE%E7%BB%84%E7%BB%87"></a>
+<h2 class="h3"> 数据组织 </h2>
+<div class="section">
+<a name="N10144"></a><a name="%E6%95%B0%E6%8D%AE%E5%9D%97"></a>
+<h3 class="h4"> 数据块 </h3>
+<p>
+        HDFS被设计成支持大文件,适用HDFS的是那些需要处理大规模的数据集的应用。这些应用都是只写入数据一次,但却读取一次或多次,并且读取速度应能满足流式读取的需要。HDFS支持文件的&ldquo;一次写入多次读取&rdquo;语义。一个典型的数据块大小是64MB。因而,HDFS中的文件总是按照64M被切分成不同的块,每个块尽可能地存储于不同的Datanode中。
+        </p>
+<a name="N1014E"></a><a name="Staging"></a>
+<h3 class="h4"> Staging </h3>
+<p>
+        客户端创建文件的请求其实并没有立即发送给Namenode,事实上,在刚开始阶段HDFS客户端会先将文件数据缓存到本地的一个临时文件。应用程序的写操作被透明地重定向到这个临时文件。当这个临时文件累积的数据量超过一个数据块的大小,客户端才会联系Namenode。Namenode将文件名插入文件系统的层次结构中,并且分配一个数据块给它。然后返回Datanode的标识符和目标数据块给客户端。接着客户端将这块数据从本地临时文件上传到指定的Datanode上。当文件关闭时,在临时文件中剩余的没有上传的数据也会传输到指定的Datanode上。然后客户端告诉Namenode文件已经关闭。此时Namenode才将文件创建操作提交到日志里进行存储。如果Namenode在文件关闭前宕机了,则该文件将丢失。
+        </p>
+<p>
+        上述方法是对在HDFS上运行的目标应用进行认真考虑后得到的结果。这些应用需要进行文件的流式写入。如果不采用客户端缓存,由于网络速度和网络堵塞会对吞估量造成比较大的影响。这种方法并不是没有先例的,早期的文件系统,比如<acronym title="Andrew File System">AFS</acronym>,就用客户端缓存来提高性能。为了达到更高的数据上传效率,已经放松了POSIX标准的要求。
+        </p>
+<a name="N10161"></a><a name="%E6%B5%81%E6%B0%B4%E7%BA%BF%E5%A4%8D%E5%88%B6"></a>
+<h3 class="h4"> 流水线复制 </h3>
+<p>
+        当客户端向HDFS文件写入数据的时候,一开始是写到本地临时文件中。假设该文件的副本系数设置为3,当本地临时文件累积到一个数据块的大小时,客户端会从Namenode获取一个Datanode列表用于存放副本。然后客户端开始向第一个Datanode传输数据,第一个Datanode一小部分一小部分(4 KB)地接收数据,将每一部分写入本地仓库,并同时传输该部分到列表中第二个Datanode节点。第二个Datanode也是这样,一小部分一小部分地接收数据,写入本地仓库,并同时传给第三个Datanode。最后,第三个Datanode接收数据并存储在本地。因此,Datanode能流水线式地从前一个节点接收数据,并在同时转发给下一个节点,数据以流水线的方式从前一个Datanode复制到下一个。
+        </p>
+</div>
+
+    
+<a name="N1016C"></a><a name="%E5%8F%AF%E8%AE%BF%E9%97%AE%E6%80%A7"></a>
+<h2 class="h3"> 可访问性 </h2>
+<div class="section">
+<p>
+      HDFS给应用提供了多种访问方式。用户可以通过<a href="http://hadoop.apache.org/core/docs/current/api/">Java API</a>接口访问,也可以通过C语言的封装API访问,还可以通过浏览器的方式访问HDFS中的文件。通过<acronym title="Web-based Distributed Authoring and Versioning">WebDAV</acronym>协议访问的方式正在开发中。
+      </p>
+<a name="N10181"></a><a name="DFSShell"></a>
+<h3 class="h4"> DFSShell </h3>
+<p>
+        HDFS以文件和目录的形式组织用户数据。它提供了一个命令行的接口(DFSShell)让用户与HDFS中的数据进行交互。命令的语法和用户熟悉的其他shell(例如 bash, csh)工具类似。下面是一些动作/命令的示例:
+        </p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+          
+<tr>
+            
+<th colspan="1" rowspan="1"> 动作 </th><th colspan="1" rowspan="1"> 命令 </th>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1"> 创建一个名为 <span class="codefrag">/foodir</span> 的目录 </td> <td colspan="1" rowspan="1"> <span class="codefrag">bin/hadoop dfs -mkdir /foodir</span> </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1"> 创建一个名为 <span class="codefrag">/foodir</span> 的目录 </td> <td colspan="1" rowspan="1"> <span class="codefrag">bin/hadoop dfs -mkdir /foodir</span> </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1"> 查看名为 <span class="codefrag">/foodir/myfile.txt</span> 的文件内容 </td> <td colspan="1" rowspan="1"> <span class="codefrag">bin/hadoop dfs -cat /foodir/myfile.txt</span> </td>
+          
+</tr>
+        
+</table>
+<p>
+        DFSShell 可以用在那些通过脚本语言和文件系统进行交互的应用程序上。
+        </p>
+<a name="N101D6"></a><a name="DFSAdmin"></a>
+<h3 class="h4"> DFSAdmin </h3>
+<p>
+		DFSAdmin 命令用来管理HDFS集群。这些命令只有HDSF的管理员才能使用。下面是一些动作/命令的示例:
+        </p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+          
+<tr>
+            
+<th colspan="1" rowspan="1"> 动作 </th><th colspan="1" rowspan="1"> 命令 </th>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1"> 将集群置于安全模式 </td> <td colspan="1" rowspan="1"> <span class="codefrag">bin/hadoop dfsadmin -safemode enter</span> </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1"> 显示Datanode列表 </td> <td colspan="1" rowspan="1"> <span class="codefrag">bin/hadoop dfsadmin -report</span> </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1"> 使Datanode节点 <span class="codefrag">datanodename</span>退役</td><td colspan="1" rowspan="1"> <span class="codefrag">bin/hadoop dfsadmin -decommission datanodename</span> </td>
+          
+</tr>
+        
+</table>
+<a name="N10221"></a><a name="%E6%B5%8F%E8%A7%88%E5%99%A8%E6%8E%A5%E5%8F%A3"></a>
+<h3 class="h4"> 浏览器接口 </h3>
+<p>
+	一个典型的HDFS安装会在一个可配置的TCP端口开启一个Web服务器用于暴露HDFS的名字空间。用户可以用浏览器来浏览HDFS的名字空间和查看文件的内容。
+       </p>
+</div> 
+
+    
+<a name="N1022C"></a><a name="%E5%AD%98%E5%82%A8%E7%A9%BA%E9%97%B4%E5%9B%9E%E6%94%B6"></a>
+<h2 class="h3"> 存储空间回收 </h2>
+<div class="section">
+<a name="N10232"></a><a name="%E6%96%87%E4%BB%B6%E7%9A%84%E5%88%A0%E9%99%A4%E5%92%8C%E6%81%A2%E5%A4%8D"></a>
+<h3 class="h4"> 文件的删除和恢复 </h3>
+<p>
+       当用户或应用程序删除某个文件时,这个文件并没有立刻从HDFS中删除。实际上,HDFS会将这个文件重命名转移到<span class="codefrag">/trash</span>目录。只要文件还在<span class="codefrag">/trash</span>目录中,该文件就可以被迅速地恢复。文件在<span class="codefrag">/trash</span>中保存的时间是可配置的,当超过这个时间时,Namenode就会将该文件从名字空间中删除。删除文件会使得该文件相关的数据块被释放。注意,从用户删除文件到HDFS空闲空间的增加之间会有一定时间的延迟。</p>
+<p>
+只要被删除的文件还在<span class="codefrag">/trash</span>目录中,用户就可以恢复这个文件。如果用户想恢复被删除的文件,他/她可以浏览<span class="codefrag">/trash</span>目录找回该文件。<span class="codefrag">/trash</span>目录仅仅保存被删除文件的最后副本。<span class="codefrag">/trash</span>目录与其他的目录没有什么区别,除了一点:在该目录上HDFS会应用一个特殊策略来自动删除文件。目前的默认策略是删除<span class="codefrag">/trash</span>中保留时间超过6小时的文件。将来,这个策略可以通过一个被良好定义的接口配置。
+        </p>
+<a name="N10257"></a><a name="%E5%87%8F%E5%B0%91%E5%89%AF%E6%9C%AC%E7%B3%BB%E6%95%B0"></a>
+<h3 class="h4"> 减少副本系数 </h3>
+<p>
+        当一个文件的副本系数被减小后,Namenode会选择过剩的副本删除。下次心跳检测时会将该信息传递给Datanode。Datanode遂即移除相应的数据块,集群中的空闲空间加大。同样,在调用<span class="codefrag">setReplication</span> API结束和集群中空闲空间增加间会有一定的延迟。</p>
+</div>
+
+
+    
+<a name="N10265"></a><a name="%E5%8F%82%E8%80%83%E8%B5%84%E6%96%99"></a>
+<h2 class="h3"> 参考资料 </h2>
+<div class="section">
+<p>
+      HDFS Java API: 
+      <a href="http://hadoop.apache.org/core/docs/current/api/"> 
+        http://hadoop.apache.org/core/docs/current/api/
+      </a>
+      
+</p>
+<p>
+      HDFS 源代码: 
+      <a href="http://hadoop.apache.org/core/version_control.html"> 
+        http://hadoop.apache.org/core/version_control.html
+      </a>
+      
+</p>
+</div> 
+
+  
+<p align="right">
+<font size="-2">by&nbsp;Dhruba Borthakur</font>
+</p>
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 424 - 0
docs/cn/hdfs_design.pdf


+ 504 - 0
docs/cn/hdfs_permissions_guide.html

@@ -0,0 +1,504 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title>
+      HDFS权限管理用户指南
+    </title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://hadoop.apache.org/core/">项目</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop">维基</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.18文档</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">文档</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">概述</a>
+</div>
+<div class="menuitem">
+<a href="quickstart.html">快速入门</a>
+</div>
+<div class="menuitem">
+<a href="cluster_setup.html">集群搭建</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_design.html">HDFS构架设计</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_user_guide.html">HDFS使用指南</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">HDFS权限指南</div>
+</div>
+<div class="menuitem">
+<a href="hdfs_quota_admin_guide.html">HDFS配额管理指南</a>
+</div>
+<div class="menuitem">
+<a href="commands_manual.html">命令手册</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_shell.html">FS Shell使用指南</a>
+</div>
+<div class="menuitem">
+<a href="distcp.html">DistCp使用指南</a>
+</div>
+<div class="menuitem">
+<a href="mapred_tutorial.html">Map-Reduce教程</a>
+</div>
+<div class="menuitem">
+<a href="native_libraries.html">Hadoop本地库</a>
+</div>
+<div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
+<a href="hadoop_archives.html">Hadoop Archives</a>
+</div>
+<div class="menuitem">
+<a href="hod.html">Hadoop On Demand</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/">维基</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">发行说明</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">变更日志</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="hdfs_permissions_guide.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>
+      HDFS权限管理用户指南
+    </h1>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#%E6%A6%82%E8%BF%B0">概述</a>
+</li>
+<li>
+<a href="#%E7%94%A8%E6%88%B7%E8%BA%AB%E4%BB%BD">用户身份</a>
+</li>
+<li>
+<a href="#%E7%90%86%E8%A7%A3%E7%B3%BB%E7%BB%9F%E7%9A%84%E5%AE%9E%E7%8E%B0">理解系统的实现</a>
+</li>
+<li>
+<a href="#%E6%96%87%E4%BB%B6%E7%B3%BB%E7%BB%9FAPI%E5%8F%98%E6%9B%B4">文件系统API变更</a>
+</li>
+<li>
+<a href="#Shell%E5%91%BD%E4%BB%A4%E5%8F%98%E6%9B%B4">Shell命令变更</a>
+</li>
+<li>
+<a href="#%E8%B6%85%E7%BA%A7%E7%94%A8%E6%88%B7">超级用户</a>
+</li>
+<li>
+<a href="#Web%E6%9C%8D%E5%8A%A1%E5%99%A8">Web服务器</a>
+</li>
+<li>
+<a href="#%E5%9C%A8%E7%BA%BF%E5%8D%87%E7%BA%A7">在线升级</a>
+</li>
+<li>
+<a href="#%E9%85%8D%E7%BD%AE%E5%8F%82%E6%95%B0">配置参数</a>
+</li>
+</ul>
+</div>
+    
+<a name="N1000D"></a><a name="%E6%A6%82%E8%BF%B0"></a>
+<h2 class="h3">概述</h2>
+<div class="section">
+<p>
+		Hadoop分布式文件系统实现了一个和POSIX系统类似的文件和目录的权限模型。每个文件和目录有一个<em>所有者(owner)</em>和一个<em>组(group)</em>。文件或目录对其所有者、同组的其他用户以及所有其他用户分别有着不同的权限。对文件而言,当读取这个文件时需要有<em>r</em>权限,当写入或者追加到文件时需要有<em>w</em>权限。对目录而言,当列出目录内容时需要具有<em>r</em>权限,当新建或删除子文件或子目录时需要有<em>w</em>权限,当访问目录的子节点时需要有<em>x</em>权限。不同于POSIX模型,HDFS权限模型中的文件没有<em>sticky</em>,<em>setuid</em>或<em>setgid</em>位,因为这里没有可执行文件的概念。为了简单起见,这里也没有目录的<em>sticky</em>,<em>setuid</em>或<em>setgid</em>位。总的来说,文件或目录的权限就是它的<em>模式(mode)</em>。HDFS采用了Unix表示和显示模式的习惯,包括使用八进制数来表示权限。当新建一个文件或目录,它的所有者即客户进程的用户,它的所属组是父目录的组(BSD的规定)。
+	</p>
+<p>
+		每个访问HDFS的用户进程的标识分为两个部分,分别是<em>用户名</em>和<em>组名列表</em>。每次用户进程访问一个文件或目录<span class="codefrag">foo</span>,HDFS都要对其进行权限检查,
+	</p>
+<ul>
+		
+<li>
+		   如果用户即<span class="codefrag">foo</span>的所有者,则检查所有者的访问权限;
+		</li>
+		
+<li>
+		   如果<span class="codefrag">foo</span>关联的组在组名列表中出现,则检查组用户的访问权限;
+		</li>
+		
+<li>
+		   否则检查<span class="codefrag">foo</span>其他用户的访问权限。
+		</li>
+	
+</ul>
+<p>
+		如果权限检查失败,则客户的操作会失败。
+</p>
+</div>
+
+
+<a name="N10065"></a><a name="%E7%94%A8%E6%88%B7%E8%BA%AB%E4%BB%BD"></a>
+<h2 class="h3">用户身份</h2>
+<div class="section">
+<p>
+在这个版本的Hadoop中,客户端用户身份是通过宿主操作系统给出。对类Unix系统来说,
+</p>
+<ul>
+
+<li>
+   用户名等于<span class="codefrag">`whoami`</span>;
+</li>
+
+<li>
+   组列表等于<span class="codefrag">`bash -c groups`</span>。
+</li>
+
+</ul>
+<p>
+将来会增加其他的方式来确定用户身份(比如Kerberos、LDAP等)。期待用上文中提到的第一种方式来防止一个用户假冒另一个用户是不现实的。这种用户身份识别机制结合权限模型允许一个协作团体以一种有组织的形式共享文件系统中的资源。
+</p>
+<p>
+不管怎样,用户身份机制对HDFS本身来说只是外部特性。HDFS并不提供创建用户身份、创建组或处理用户凭证等功能。
+</p>
+</div>
+
+
+<a name="N10083"></a><a name="%E7%90%86%E8%A7%A3%E7%B3%BB%E7%BB%9F%E7%9A%84%E5%AE%9E%E7%8E%B0"></a>
+<h2 class="h3">理解系统的实现</h2>
+<div class="section">
+<p>
+	每次文件或目录操作都传递完整的路径名给name node,每一个操作都会对此路径做权限检查。客户框架会隐式地将用户身份和与name node的连接关联起来,从而减少改变现有客户端API的需求。经常会有这种情况,当对一个文件的某一操作成功后,之后同样的操作却会失败,这是因为文件或路径上的某些目录已经不复存在了。比如,客户端首先开始读一个文件,它向name node发出一个请求以获取文件第一个数据块的位置。但接下去的获取其他数据块的第二个请求可能会失败。另一方面,删除一个文件并不会撤销客户端已经获得的对文件数据块的访问权限。而权限管理能使得客户端对一个文件的访问许可在两次请求之间被收回。重复一下,权限的改变并不会撤销当前客户端对文件数据块的访问许可。
+</p>
+<p>
+map-reduce框架通过传递字符串来指派用户身份,没有做其他特别的安全方面的考虑。文件或目录的所有者和组属性是以字符串的形式保存,而不是像传统的Unix方式转换为用户和组的数字ID。
+</p>
+<p>
+这个发行版本的权限管理特性并不需要改变data node的任何行为。Data node上的数据块上并没有任何<em>Hadoop</em>所有者或权限等关联属性。
+</p>
+</div>
+     
+
+<a name="N10096"></a><a name="%E6%96%87%E4%BB%B6%E7%B3%BB%E7%BB%9FAPI%E5%8F%98%E6%9B%B4"></a>
+<h2 class="h3">文件系统API变更</h2>
+<div class="section">
+<p>
+	如果权限检查失败,所有使用一个路径参数的方法都可能抛出<span class="codefrag">AccessControlException</span>异常。
+</p>
+<p>新增方法:</p>
+<ul>
+	
+<li>
+		
+<span class="codefrag">public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException;</span>
+	
+</li>
+	
+<li>
+		
+<span class="codefrag">public boolean mkdirs(Path f, FsPermission permission) throws IOException;</span>
+	
+</li>
+	
+<li>
+		
+<span class="codefrag">public void setPermission(Path p, FsPermission permission) throws IOException;</span>
+	
+</li>
+	
+<li>
+		
+<span class="codefrag">public void setOwner(Path p, String username, String groupname) throws IOException;</span>
+	
+</li>
+	
+<li>
+		
+<span class="codefrag">public FileStatus getFileStatus(Path f) throws IOException;</span> 也会返回路径关联的所有者、组和模式属性。
+	</li>
+
+
+</ul>
+<p>
+新建文件或目录的模式受配置参数<span class="codefrag">umask</span>的约束。当使用之前的 <span class="codefrag">create(path, &hellip;)</span> 方法(<em>没有指定</em>权限参数)时,新文件的模式是<span class="codefrag">666&thinsp;&amp;&thinsp;^umask</span>。当使用新的 <span class="codefrag">create(path, </span><em>permission</em><span class="codefrag">, &hellip;)</span> 方法(<em>指定了</em>权限参数<em>P</em>)时,新文件的模式是<span class="codefrag">P&thinsp;&amp;&thinsp;^umask&thinsp;&amp;&thinsp;666</span>。当使用先前的 <span class="codefrag">mkdirs(path)</span> 方法(<em>没有指定</em> 权限参数)新建一个目录时,新目录的模式是<span class="codefrag">777&thinsp;&amp;&thinsp;^umask</span>。当使用新的 <span class="codefrag">mkdirs(path, </span><em>permission</em> <span class="codefrag">)</span> 方法(<em>指定了</em>权限参数<em>P</em>)新建一个目录时,新目录的模式是<span class="codefrag">P&thinsp;&amp;&thinsp;^umask&thinsp;&amp;&thinsp;777</span>。
+</p>
+</div>
+
+     
+
+<a name="N10100"></a><a name="Shell%E5%91%BD%E4%BB%A4%E5%8F%98%E6%9B%B4"></a>
+<h2 class="h3">Shell命令变更</h2>
+<div class="section">
+<p>新增操作:</p>
+<dl>
+	
+<dt>
+<span class="codefrag">chmod [-R]</span> <em>mode file &hellip;</em>
+</dt>
+	
+<dd>
+		只有文件的所有者或者超级用户才有权限改变文件模式。
+	</dd>
+	
+<dt>
+<span class="codefrag">chgrp [-R]</span> <em>group file &hellip;</em>
+</dt>
+	
+<dd>
+		使用<span class="codefrag">chgrp</span>命令的用户必须属于特定的组且是文件的所有者,或者用户是超级用户。
+	</dd>
+	
+<dt>
+<span class="codefrag">chown [-R]</span> <em>[owner][:[group]] file &hellip;</em>
+</dt>
+	
+<dd>
+		文件的所有者的只能被超级用户更改。
+	</dd>
+	
+<dt>
+<span class="codefrag">ls </span> <em>file &hellip;</em>
+</dt>
+<dd></dd>
+	
+<dt>
+<span class="codefrag">lsr </span> <em>file &hellip;</em>
+</dt>
+	
+<dd>
+		输出格式做了调整以显示所有者、组和模式。
+	</dd>
+
+</dl>
+</div>
+
+     
+
+<a name="N1013F"></a><a name="%E8%B6%85%E7%BA%A7%E7%94%A8%E6%88%B7"></a>
+<h2 class="h3">超级用户</h2>
+<div class="section">
+<p>
+超级用户即运行name node进程的用户。宽泛的讲,如果你启动了name node,你就是超级用户。超级用户干任何事情,因为超级用户能够通过所有的权限检查。没有永久记号保留谁<em>过去</em>是超级用户;当name node开始运行时,进程自动判断谁<em>现在</em>是超级用户。HDFS的超级用户不一定非得是name node主机上的超级用户,也不需要所有的集群的超级用户都是一个。同样的,在个人工作站上运行HDFS的实验者,不需任何配置就已方便的成为了他的部署实例的超级用户。
+	</p>
+<p>
+	另外,管理员可以用配置参数指定一组特定的用户,如果做了设定,这个组的成员也会是超级用户。
+</p>
+</div>
+
+
+<a name="N10152"></a><a name="Web%E6%9C%8D%E5%8A%A1%E5%99%A8"></a>
+<h2 class="h3">Web服务器</h2>
+<div class="section">
+<p>
+Web服务器的身份是一个可配置参数。Name node并没有<em>真实</em>用户的概念,但是Web服务器表现地就像它具有管理员选定的用户的身份(用户名和组)一样。除非这个选定的身份是超级用户,否则会有名字空间中的一部分对Web服务器来说不可见。
+</p>
+</div>
+
+
+<a name="N1015F"></a><a name="%E5%9C%A8%E7%BA%BF%E5%8D%87%E7%BA%A7"></a>
+<h2 class="h3">在线升级</h2>
+<div class="section">
+<p>
+如果集群在0.15版本的数据集(<span class="codefrag">fsimage</span>)上启动,所有的文件和目录都有所有者<em>O</em>,组<em>G</em>,和模式<em>M</em>,这里 <em>O</em> 和 <em>G</em> 分别是超级用户的用户标识和组名,<em>M</em>是一个配置参数。</p>
+</div>
+
+
+<a name="N1017E"></a><a name="%E9%85%8D%E7%BD%AE%E5%8F%82%E6%95%B0"></a>
+<h2 class="h3">配置参数</h2>
+<div class="section">
+<dl>
+	
+<dt>
+<span class="codefrag">dfs.permissions = true </span>
+</dt>
+	
+<dd>
+		如果是 <span class="codefrag">true</span>,则打开前文所述的权限系统。如果是 <span class="codefrag">false</span>,权限<em>检查</em> 就是关闭的,但是其他的行为没有改变。这个配置参数的改变并不改变文件或目录的模式、所有者和组等信息。
+		<p>
+		
+</p>
+		不管权限模式是开还是关,<span class="codefrag">chmod</span>,<span class="codefrag">chgrp</span> 和 <span class="codefrag">chown</span> <em>总是</em> 会检查权限。这些命令只有在权限检查背景下才有用,所以不会有兼容性问题。这样,这就能让管理员在打开常规的权限检查之前可以可靠地设置文件的所有者和权限。
+	</dd>
+	
+<dt>
+<span class="codefrag">dfs.web.ugi = webuser,webgroup</span>
+</dt>
+	
+<dd>
+	Web服务器使用的用户名。如果将这个参数设置为超级用户的名称,则所有Web客户就可以看到所有的信息。如果将这个参数设置为一个不使用的用户,则Web客户就只能访问到&ldquo;other&rdquo;权限可访问的资源了。额外的组可以加在后面,形成一个用逗号分隔的列表。
+	</dd>
+	
+<dt>
+<span class="codefrag">dfs.permissions.supergroup = supergroup</span>
+</dt>
+	
+<dd>
+	超级用户的组名。
+	</dd>
+	
+<dt>
+<span class="codefrag">dfs.upgrade.permission = 777</span>
+</dt>
+	
+<dd>
+	升级时的初始模式。文件<em>永不会</em>被设置<em>x</em>权限。在配置文件中,可以使用十进制数<em>511<sub>10</sub></em>。
+	</dd>
+	
+<dt>
+<span class="codefrag">dfs.umask = 022</span>
+</dt>
+	
+<dd>
+		
+<span class="codefrag">umask</span>参数在创建文件和目录时使用。在配置文件中,可以使用十进制数<em>18<sub>10</sub></em>。
+	</dd>
+
+</dl>
+</div>
+
+     
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 129 - 0
docs/cn/hdfs_permissions_guide.pdf


+ 277 - 0
docs/cn/hdfs_quota_admin_guide.html

@@ -0,0 +1,277 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title>
+      名字空间配额管理指南
+    </title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://hadoop.apache.org/core/">项目</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop">维基</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.18文档</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">文档</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">概述</a>
+</div>
+<div class="menuitem">
+<a href="quickstart.html">快速入门</a>
+</div>
+<div class="menuitem">
+<a href="cluster_setup.html">集群搭建</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_design.html">HDFS构架设计</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_user_guide.html">HDFS使用指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_permissions_guide.html">HDFS权限指南</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">HDFS配额管理指南</div>
+</div>
+<div class="menuitem">
+<a href="commands_manual.html">命令手册</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_shell.html">FS Shell使用指南</a>
+</div>
+<div class="menuitem">
+<a href="distcp.html">DistCp使用指南</a>
+</div>
+<div class="menuitem">
+<a href="mapred_tutorial.html">Map-Reduce教程</a>
+</div>
+<div class="menuitem">
+<a href="native_libraries.html">Hadoop本地库</a>
+</div>
+<div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
+<a href="hadoop_archives.html">Hadoop Archives</a>
+</div>
+<div class="menuitem">
+<a href="hod.html">Hadoop On Demand</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/">维基</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">发行说明</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">变更日志</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="hdfs_quota_admin_guide.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>
+      名字空间配额管理指南
+    </h1>
+      
+<p>
+      Hadoop分布式文件系统(HDFS)允许管理员为每个目录设置配额。
+      新建立的目录没有配额。
+      最大的配额是<span class="codefrag">Long.Max_Value</span>。配额为1可以强制目录保持为空。
+      </p>
+
+      
+<p>
+      目录配额是对目录树上该目录下的名字数量做硬性限制。如果创建文件或目录时超过了配额,该操作会失败。重命名不会改变该目录的配额;如果重命名操作会导致违反配额限制,该操作将会失败。如果尝试设置一个配额而现有文件数量已经超出了这个新配额,则设置失败。
+      </p>
+
+      
+<p>
+      配额和fsimage保持一致。当启动时,如果fsimage违反了某个配额限制(也许fsimage被偷偷改变了),则启动失败并生成错误报告。设置或删除一个配额会创建相应的日志记录。
+      </p> 
+
+      
+<p>
+      下面的新命令或新选项是用于支持配额的。
+      前两个是管理员命令。
+      </p>
+
+      
+<ul>
+      
+<li>
+      
+<span class="codefrag">dfsadmin -setquota &lt;N&gt; &lt;directory&gt;...&lt;directory&gt;</span> 
+      
+<br> 
+      把每个目录配额设为<span class="codefrag">N</span>。这个命令会在每个目录上尝试,
+      如果<span class="codefrag">N</span>不是一个正的长整型数,目录不存在或是文件名,
+      或者目录超过配额,则会产生错误报告。
+      </li>
+  
+      
+<li>
+      
+<span class="codefrag">dfsadmin -clrquota &lt;directory&gt;...&lt;director&gt;</span>
+<br> 
+      为每个目录删除配额。这个命令会在每个目录上尝试,如果目录不存在或者是文件,则会产生错误报告。如果目录原来没有设置配额不会报错。
+      </li>
+  
+      
+<li>
+      
+<span class="codefrag">fs -count -q &lt;directory&gt;...&lt;directory&gt;</span>
+<br>
+      使用<span class="codefrag">-q</span>选项,会报告每个目录设置的配额,以及剩余配额。
+      如果目录没有设置配额,会报告<span class="codefrag">none</span>和<span class="codefrag">inf</span>。
+      </li>
+      
+</ul>
+   
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 47 - 0
docs/cn/hdfs_quota_admin_guide.pdf


+ 860 - 0
docs/cn/hdfs_shell.html

@@ -0,0 +1,860 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Hadoop Shell命令</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://hadoop.apache.org/core/">项目</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop">维基</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.18文档</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">文档</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">概述</a>
+</div>
+<div class="menuitem">
+<a href="quickstart.html">快速入门</a>
+</div>
+<div class="menuitem">
+<a href="cluster_setup.html">集群搭建</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_design.html">HDFS构架设计</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_user_guide.html">HDFS使用指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_permissions_guide.html">HDFS权限指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_quota_admin_guide.html">HDFS配额管理指南</a>
+</div>
+<div class="menuitem">
+<a href="commands_manual.html">命令手册</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">FS Shell使用指南</div>
+</div>
+<div class="menuitem">
+<a href="distcp.html">DistCp使用指南</a>
+</div>
+<div class="menuitem">
+<a href="mapred_tutorial.html">Map-Reduce教程</a>
+</div>
+<div class="menuitem">
+<a href="native_libraries.html">Hadoop本地库</a>
+</div>
+<div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
+<a href="hadoop_archives.html">Hadoop Archives</a>
+</div>
+<div class="menuitem">
+<a href="hod.html">Hadoop On Demand</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/">维基</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">发行说明</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">变更日志</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="hdfs_shell.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Hadoop Shell命令</h1>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#FS+Shell"> FS Shell </a>
+<ul class="minitoc">
+<li>
+<a href="#cat"> cat </a>
+</li>
+<li>
+<a href="#chgrp"> chgrp </a>
+</li>
+<li>
+<a href="#chmod"> chmod </a>
+</li>
+<li>
+<a href="#chown"> chown </a>
+</li>
+<li>
+<a href="#copyFromLocal">copyFromLocal</a>
+</li>
+<li>
+<a href="#copyToLocal"> copyToLocal</a>
+</li>
+<li>
+<a href="#cp"> cp </a>
+</li>
+<li>
+<a href="#du">du</a>
+</li>
+<li>
+<a href="#dus"> dus </a>
+</li>
+<li>
+<a href="#expunge"> expunge </a>
+</li>
+<li>
+<a href="#get"> get </a>
+</li>
+<li>
+<a href="#getmerge"> getmerge </a>
+</li>
+<li>
+<a href="#ls"> ls </a>
+</li>
+<li>
+<a href="#lsr">lsr</a>
+</li>
+<li>
+<a href="#mkdir"> mkdir </a>
+</li>
+<li>
+<a href="#movefromLocal"> movefromLocal </a>
+</li>
+<li>
+<a href="#mv"> mv </a>
+</li>
+<li>
+<a href="#put"> put </a>
+</li>
+<li>
+<a href="#rm"> rm </a>
+</li>
+<li>
+<a href="#rmr"> rmr </a>
+</li>
+<li>
+<a href="#setrep"> setrep </a>
+</li>
+<li>
+<a href="#stat"> stat </a>
+</li>
+<li>
+<a href="#tail"> tail </a>
+</li>
+<li>
+<a href="#test"> test </a>
+</li>
+<li>
+<a href="#text"> text </a>
+</li>
+<li>
+<a href="#touchz"> touchz </a>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<!--DCCOMMENT:diff begin-->
+		
+<a name="N1000F"></a><a name="FS+Shell"></a>
+<h2 class="h3"> FS Shell </h2>
+<div class="section">
+<p>
+      调用文件系统(FS)Shell命令应使用
+      <span class="codefrag">bin/hadoop fs &lt;args&gt;</span>的形式。
+      所有的的FS shell命令使用URI路径作为参数。URI格式是<em>scheme://authority/path</em>。对HDFS文件系统,scheme是<em>hdfs</em>,对本地文件系统,scheme是<em>file</em>。其中scheme和authority参数都是可选的,如果未加指定,就会使用配置中指定的默认scheme。一个HDFS文件或目录比如<em>/parent/child</em>可以表示成<em>hdfs://namenode:namenodeport/parent/child</em>,或者更简单的<em>/parent/child</em>(假设你配置文件中的默认值是<em>namenode:namenodeport</em>)。大多数FS Shell命令的行为和对应的Unix Shell命令类似,不同之处会在下面介绍各命令使用详情时指出。出错信息会输出到<em>stderr</em>,其他信息输出到<em>stdout</em>。
+  </p>
+<a name="N10036"></a><a name="cat"></a>
+<h3 class="h4"> cat </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -cat URI [URI &hellip;]</span>
+			
+</p>
+<p>
+		   将路径指定文件的内容输出到<em>stdout</em>。
+		   </p>
+<p>示例:</p>
+<ul>
+				
+<li>
+					
+<span class="codefrag"> hadoop fs -cat hdfs://host1:port1/file1 hdfs://host2:port2/file2 
+		   </span>
+				
+</li>
+				
+<li>
+					
+<span class="codefrag">hadoop fs -cat file:///file3 /user/hadoop/file4 </span>
+				
+</li>
+			
+</ul>
+<p>返回值:<br>
+<!--DCCOMMENT:diff end
+note:"hadoop dfs" has been replaced by "hadoop fs" in this doc.
+
+@@ -39,11 +50,11 @@
+                        <p>Example:</p>
+                        <ul>
+                                <li>
+-                                       <code> hadoop dfs -cat hdfs://host1:port1/file1 hdfs://host2:port2/file2
++                                       <code> hadoop fs -cat hdfs://nn1.example.com/file1 hdfs://nn2.example.com/file2
+                   </code>
+                                </li>
+                                <li>
+-                                       <code>hadoop dfs -cat file:///file3 /user/hadoop/file4 </code>
++                                       <code>hadoop fs -cat file:///file3 /user/hadoop/file4 </code>
+                                </li>
+                        </ul>
+                        <p>Exit Code:<br/>
+-->
+		   
+<span class="codefrag"> 成功返回0,失败返回-1。</span>
+</p>
+<a name="N10068"></a><a name="chgrp"></a>
+<h3 class="h4"> chgrp </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -chgrp [-R] GROUP URI [URI &hellip;]</span>
+            Change group association of files. With <span class="codefrag">-R</span>, make the change recursively through the directory structure. The user must be the owner of files, or else a super-user. Additional information is in the <a href="hdfs_permissions_guide.html">Permissions User Guide</a>.
+--&gt;
+			</p>
+<p>
+	    改变文件所属的组。使用<span class="codefrag">-R</span>将使改变在目录结构下递归进行。命令的使用者必须是文件的所有者或者超级用户。更多的信息请参见<a href="hdfs_permissions_guide.html">HDFS权限用户指南</a>。
+	    </p>
+<a name="N10086"></a><a name="chmod"></a>
+<h3 class="h4"> chmod </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -chmod [-R] &lt;MODE[,MODE]... | OCTALMODE&gt; URI [URI &hellip;]</span>
+			
+</p>
+<p>
+	    改变文件的权限。使用<span class="codefrag">-R</span>将使改变在目录结构下递归进行。命令的使用者必须是文件的所有者或者超级用户。更多的信息请参见<a href="hdfs_permissions_guide.html">HDFS权限用户指南</a>。
+	    </p>
+<a name="N1009D"></a><a name="chown"></a>
+<h3 class="h4"> chown </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -chown [-R] [OWNER][:[GROUP]] URI [URI ]</span>
+			
+</p>
+<p>
+	    改变文件的拥有者。使用<span class="codefrag">-R</span>将使改变在目录结构下递归进行。命令的使用者必须是超级用户。更多的信息请参见<a href="hdfs_permissions_guide.html">HDFS权限用户指南</a>。
+	    </p>
+<a name="N100B4"></a><a name="copyFromLocal"></a>
+<h3 class="h4">copyFromLocal</h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -copyFromLocal &lt;localsrc&gt; URI</span>
+			
+</p>
+<p>除了限定源路径是一个本地文件外,和<a href="#putlink"><strong>put</strong></a>命令相似。</p>
+<a name="N100C9"></a><a name="copyToLocal"></a>
+<h3 class="h4"> copyToLocal</h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -copyToLocal [-ignorecrc] [-crc] URI &lt;localdst&gt;</span>
+			
+</p>
+<p>除了限定目标路径是一个本地文件外,和<a href="#getlink"><strong>get</strong></a>命令类似。</p>
+<a name="N100DE"></a><a name="cp"></a>
+<h3 class="h4"> cp </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -cp URI [URI &hellip;] &lt;dest&gt;</span>
+			
+</p>
+<p>
+	    将文件从源路径复制到目标路径。这个命令允许有多个源路径,此时目标路径必须是一个目录。
+	    <br>
+	    示例:</p>
+<ul>
+				
+<li>
+					
+<span class="codefrag"> hadoop fs -cp /user/hadoop/file1 /user/hadoop/file2</span>
+				
+</li>
+				
+<li>
+					
+<span class="codefrag"> hadoop fs -cp /user/hadoop/file1 /user/hadoop/file2 /user/hadoop/dir </span>
+				
+</li>
+			
+</ul>
+<p>返回值:</p>
+<p>
+				
+<span class="codefrag"> 成功返回0,失败返回-1。</span>
+			
+</p>
+<a name="N10108"></a><a name="du"></a>
+<h3 class="h4">du</h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -du URI [URI &hellip;]</span>
+			
+</p>
+<p>
+	     显示目录中所有文件的大小,或者当只指定一个文件时,显示此文件的大小。<br>
+	     示例:<br>
+<span class="codefrag">hadoop fs -du /user/hadoop/dir1 /user/hadoop/file1 hdfs://host:port/user/hadoop/dir1</span>
+<br>
+	     返回值:<br>
+<span class="codefrag"> 成功返回0,失败返回-1。</span>
+<br>
+</p>
+<a name="N10123"></a><a name="dus"></a>
+<h3 class="h4"> dus </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -dus &lt;args&gt;</span>
+			
+</p>
+<p>
+	   显示文件的大小。
+	   </p>
+<a name="N10133"></a><a name="expunge"></a>
+<h3 class="h4"> expunge </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -expunge</span>
+			
+</p>
+<p>清空回收站。请参考<a href="hdfs_design.html">HDFS设计</a>文档以获取更多关于回收站特性的信息。
+	   </p>
+<a name="N10147"></a><a name="get"></a>
+<h3 class="h4"> get </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -get [-ignorecrc] [-crc] &lt;src&gt; &lt;localdst&gt;</span>
+				
+<br>
+			
+</p>
+<p>
+	   复制文件到本地文件系统。可用<span class="codefrag">-ignorecrc</span>选项复制CRC校验失败的文件。使用<span class="codefrag">-crc</span>选项复制文件以及CRC信息。
+	  		</p>
+<p>示例:</p>
+<ul>
+				
+<li>
+					
+<span class="codefrag"> hadoop fs -get /user/hadoop/file localfile </span>
+				
+</li>
+				
+<li>
+					
+<span class="codefrag"> hadoop fs -get hdfs://host:port/user/hadoop/file localfile</span>
+				
+</li>
+			
+</ul>
+<p>返回值:</p>
+<p>
+				
+<span class="codefrag"> 成功返回0,失败返回-1。</span>
+			
+</p>
+<a name="N1017B"></a><a name="getmerge"></a>
+<h3 class="h4"> getmerge </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -getmerge &lt;src&gt; &lt;localdst&gt; [addnl]</span>
+			
+</p>
+<p>
+	  接受一个源目录和一个目标文件作为输入,并且将源目录中所有的文件连接成本地目标文件。<span class="codefrag">addnl</span>是可选的,用于指定在每个文件结尾添加一个换行符。 
+	  </p>
+<a name="N1018E"></a><a name="ls"></a>
+<h3 class="h4"> ls </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -ls &lt;args&gt;</span>
+			
+</p>
+<p>如果是文件,则按照如下格式返回文件信息:<br>
+<span class="codefrag">文件名 &lt;副本数&gt; 文件大小 修改日期 修改时间 权限 用户ID 组ID</span>
+<br>
+	         如果是目录,则返回它直接子文件的一个列表,就像在Unix中一样。目录返回列表的信息如下:<br>
+<span class="codefrag">目录名 &lt;dir&gt; 修改日期 修改时间 权限 用户ID 组ID</span>
+<br>
+	         示例:<br>
+<span class="codefrag">hadoop fs -ls /user/hadoop/file1 /user/hadoop/file2 hdfs://host:port/user/hadoop/dir1 /nonexistentfile</span>
+<br>
+	         返回值:<br>
+<span class="codefrag"> 成功返回0,失败返回-1。</span>
+<br>
+</p>
+<a name="N101B1"></a><a name="lsr"></a>
+<h3 class="h4">lsr</h3>
+<p>
+<span class="codefrag">使用方法:hadoop fs -lsr &lt;args&gt;</span>
+<br>
+	      
+<span class="codefrag">ls</span>命令的递归版本。类似于Unix中的<span class="codefrag">ls -R</span>。
+	      </p>
+<a name="N101C4"></a><a name="mkdir"></a>
+<h3 class="h4"> mkdir </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -mkdir &lt;paths&gt;</span>
+				
+<br>
+			
+</p>
+<p>接受路径指定的uri作为参数,创建这些目录。其行为类似于Unix的mkdir -p,它会创建路径中的各级父目录。</p>
+<p>示例:</p>
+<ul>
+				
+<li>
+					
+<span class="codefrag">hadoop fs -mkdir /user/hadoop/dir1 /user/hadoop/dir2 </span>
+				
+</li>
+				
+<li>
+					
+<span class="codefrag">hadoop fs -mkdir hdfs://host1:port1/user/hadoop/dir hdfs://host2:port2/user/hadoop/dir
+	  </span>
+				
+</li>
+			
+</ul>
+<p>返回值:</p>
+<p>
+				
+<span class="codefrag">成功返回0,失败返回-1。</span>
+			
+</p>
+<a name="N101F1"></a><a name="movefromLocal"></a>
+<h3 class="h4"> movefromLocal </h3>
+<p>
+				
+<span class="codefrag">使用方法:dfs -moveFromLocal &lt;src&gt; &lt;dst&gt;</span>
+			
+</p>
+<p>输出一个&rdquo;not implemented&ldquo;信息。
+	   </p>
+<a name="N10201"></a><a name="mv"></a>
+<h3 class="h4"> mv </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -mv URI [URI &hellip;] &lt;dest&gt;</span>
+			
+</p>
+<p>
+	    将文件从源路径移动到目标路径。这个命令允许有多个源路径,此时目标路径必须是一个目录。不允许在不同的文件系统间移动文件。
+	    <br>
+	    示例:
+	    </p>
+<ul>
+				
+<li>
+					
+<span class="codefrag"> hadoop fs -mv /user/hadoop/file1 /user/hadoop/file2</span>
+				
+</li>
+				
+<li>
+					
+<span class="codefrag"> hadoop fs -mv hdfs://host:port/file1 hdfs://host:port/file2 hdfs://host:port/file3 hdfs://host:port/dir1</span>
+				
+</li>
+			
+</ul>
+<p>返回值:</p>
+<p>
+				
+<span class="codefrag"> 成功返回0,失败返回-1。</span>
+			
+</p>
+<a name="N1022B"></a><a name="put"></a>
+<h3 class="h4"> put </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -put &lt;localsrc&gt; ... &lt;dst&gt;</span>
+			
+</p>
+<p>从本地文件系统中复制单个或多个源路径到目标文件系统。也支持从标准输入中读取输入写入目标文件系统。<br>
+	   
+</p>
+<ul>
+				
+<li>
+					
+<span class="codefrag"> hadoop fs -put localfile /user/hadoop/hadoopfile</span>
+				
+</li>
+				
+<li>
+					
+<span class="codefrag"> hadoop fs -put localfile1 localfile2 /user/hadoop/hadoopdir</span>
+				
+</li>
+				
+<li>
+					
+<span class="codefrag"> hadoop fs -put localfile hdfs://host:port/hadoop/hadoopfile</span>
+				
+</li>
+				
+<li>
+<span class="codefrag">hadoop fs -put - hdfs://host:port/hadoop/hadoopfile</span>
+<br>从标准输入中读取输入。</li>
+			
+</ul>
+<p>返回值:</p>
+<p>
+				
+<span class="codefrag"> 成功返回0,失败返回-1。</span>
+			
+</p>
+<a name="N10262"></a><a name="rm"></a>
+<h3 class="h4"> rm </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -rm URI [URI &hellip;] </span>
+			
+</p>
+<p>
+	   删除指定的文件。只删除非空目录和文件。请参考rmr命令了解递归删除。<br>
+	   示例:
+	   </p>
+<ul>
+				
+<li>
+					
+<span class="codefrag"> hadoop fs -rm hdfs://host:port/file /user/hadoop/emptydir </span>
+				
+</li>
+			
+</ul>
+<p>返回值:</p>
+<p>
+				
+<span class="codefrag"> 成功返回0,失败返回-1。</span>
+			
+</p>
+<a name="N10286"></a><a name="rmr"></a>
+<h3 class="h4"> rmr </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -rmr URI [URI &hellip;]</span>
+			
+</p>
+<p>delete的递归版本。<br>
+	   示例:
+	   </p>
+<ul>
+				
+<li>
+					
+<span class="codefrag"> hadoop fs -rmr /user/hadoop/dir </span>
+				
+</li>
+				
+<li>
+					
+<span class="codefrag"> hadoop fs -rmr hdfs://host:port/user/hadoop/dir </span>
+				
+</li>
+			
+</ul>
+<p>返回值:</p>
+<p>
+				
+<span class="codefrag"> 成功返回0,失败返回-1。</span>
+			
+</p>
+<a name="N102B0"></a><a name="setrep"></a>
+<h3 class="h4"> setrep </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -setrep [-R] &lt;path&gt;</span>
+			
+</p>
+<p>
+	   改变一个文件的副本系数。-R选项用于递归改变目录下所有文件的副本系数。
+	  </p>
+<p>示例:</p>
+<ul>
+				
+<li>
+					
+<span class="codefrag"> hadoop fs -setrep -w 3 -R /user/hadoop/dir1 </span>
+				
+</li>
+			
+</ul>
+<p>返回值:</p>
+<p>
+				
+<span class="codefrag">成功返回0,失败返回-1。</span>
+			
+</p>
+<a name="N102D5"></a><a name="stat"></a>
+<h3 class="h4"> stat </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -stat URI [URI &hellip;]</span>
+			
+</p>
+<p>
+	   返回指定路径的统计信息。
+	   </p>
+<p>示例:</p>
+<ul>
+				
+<li>
+					
+<span class="codefrag"> hadoop fs -stat path </span>
+				
+</li>
+			
+</ul>
+<p>返回值:<br>
+	   
+<span class="codefrag"> 成功返回0,失败返回-1。</span>
+</p>
+<a name="N102F8"></a><a name="tail"></a>
+<h3 class="h4"> tail </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -tail [-f] URI </span>
+			
+</p>
+<p>
+	   将文件尾部1K字节的内容输出到stdout。支持-f选项,行为和Unix中一致。
+	   </p>
+<p>示例:</p>
+<ul>
+				
+<li>
+					
+<span class="codefrag"> hadoop fs -tail pathname </span>
+				
+</li>
+			
+</ul>
+<p>返回值:<br>
+	   
+<span class="codefrag"> 成功返回0,失败返回-1。</span>
+</p>
+<a name="N1031B"></a><a name="test"></a>
+<h3 class="h4"> test </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -test -[ezd] URI</span>
+			
+</p>
+<p>
+	   选项:<br>
+	   -e 检查文件是否存在。如果存在则返回0。<br>
+	   -z 检查文件是否是0字节。如果是则返回0。 <br>
+	   -d 如果路径是个目录,则返回1,否则返回0。<br>
+</p>
+<p>示例:</p>
+<ul>
+				
+<li>
+					
+<span class="codefrag"> hadoop fs -test -e filename </span>
+				
+</li>
+			
+</ul>
+<a name="N1033E"></a><a name="text"></a>
+<h3 class="h4"> text </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -text &lt;src&gt;</span>
+				
+<br>
+			
+</p>
+<p>
+	   将源文件输出为文本格式。允许的格式是zip和TextRecordInputStream。
+	  </p>
+<a name="N10350"></a><a name="touchz"></a>
+<h3 class="h4"> touchz </h3>
+<p>
+				
+<span class="codefrag">使用方法:hadoop fs -touchz URI [URI &hellip;]</span>
+				
+<br>
+			
+</p>
+<p>
+	   创建一个0字节的空文件。
+	   </p>
+<p>示例:</p>
+<ul>
+				
+<li>
+					
+<span class="codefrag"> hadoop -touchz pathname </span>
+				
+</li>
+			
+</ul>
+<p>返回值:<br>
+	   
+<span class="codefrag"> 成功返回0,失败返回-1。</span>
+</p>
+</div>
+	
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 347 - 0
docs/cn/hdfs_shell.pdf


+ 718 - 0
docs/cn/hdfs_user_guide.html

@@ -0,0 +1,718 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title>
+      Hadoop分布式文件系统使用指南
+    </title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://hadoop.apache.org/core/">项目</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop">维基</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.18文档</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">文档</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">概述</a>
+</div>
+<div class="menuitem">
+<a href="quickstart.html">快速入门</a>
+</div>
+<div class="menuitem">
+<a href="cluster_setup.html">集群搭建</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_design.html">HDFS构架设计</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">HDFS使用指南</div>
+</div>
+<div class="menuitem">
+<a href="hdfs_permissions_guide.html">HDFS权限指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_quota_admin_guide.html">HDFS配额管理指南</a>
+</div>
+<div class="menuitem">
+<a href="commands_manual.html">命令手册</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_shell.html">FS Shell使用指南</a>
+</div>
+<div class="menuitem">
+<a href="distcp.html">DistCp使用指南</a>
+</div>
+<div class="menuitem">
+<a href="mapred_tutorial.html">Map-Reduce教程</a>
+</div>
+<div class="menuitem">
+<a href="native_libraries.html">Hadoop本地库</a>
+</div>
+<div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
+<a href="hadoop_archives.html">Hadoop Archives</a>
+</div>
+<div class="menuitem">
+<a href="hod.html">Hadoop On Demand</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/">维基</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">发行说明</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">变更日志</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="hdfs_user_guide.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>
+      Hadoop分布式文件系统使用指南
+    </h1>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#%E7%9B%AE%E7%9A%84">目的</a>
+</li>
+<li>
+<a href="#%E6%A6%82%E8%BF%B0"> 概述 </a>
+</li>
+<li>
+<a href="#%E5%85%88%E5%86%B3%E6%9D%A1%E4%BB%B6"> 先决条件 </a>
+</li>
+<li>
+<a href="#Web%E6%8E%A5%E5%8F%A3"> Web接口 </a>
+</li>
+<li>
+<a href="#Shell%E5%91%BD%E4%BB%A4">Shell命令</a>
+<ul class="minitoc">
+<li>
+<a href="#DFSAdmin%E5%91%BD%E4%BB%A4"> DFSAdmin命令 </a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#Secondary+NameNode"> Secondary NameNode </a>
+</li>
+<li>
+<a href="#Rebalancer"> Rebalancer </a>
+</li>
+<li>
+<a href="#%E6%9C%BA%E6%9E%B6%E6%84%9F%E7%9F%A5%EF%BC%88Rack+awareness%EF%BC%89"> 机架感知(Rack awareness) </a>
+</li>
+<li>
+<a href="#%E5%AE%89%E5%85%A8%E6%A8%A1%E5%BC%8F"> 安全模式 </a>
+</li>
+<li>
+<a href="#fsck"> fsck </a>
+</li>
+<li>
+<a href="#%E5%8D%87%E7%BA%A7%E5%92%8C%E5%9B%9E%E6%BB%9A"> 升级和回滚 </a>
+</li>
+<li>
+<a href="#%E6%96%87%E4%BB%B6%E6%9D%83%E9%99%90%E5%92%8C%E5%AE%89%E5%85%A8%E6%80%A7"> 文件权限和安全性 </a>
+</li>
+<li>
+<a href="#%E5%8F%AF%E6%89%A9%E5%B1%95%E6%80%A7"> 可扩展性 </a>
+</li>
+<li>
+<a href="#%E7%9B%B8%E5%85%B3%E6%96%87%E6%A1%A3"> 相关文档 </a>
+</li>
+</ul>
+</div>
+    
+<a name="N1000F"></a><a name="%E7%9B%AE%E7%9A%84"></a>
+<h2 class="h3">目的</h2>
+<div class="section">
+<p>
+	本文档的目标是为Hadoop分布式文件系统(HDFS)的用户提供一个学习的起点,这里的HDFS既可以作为<a href="http://hadoop.apache.org/">Hadoop</a>集群的一部分,也可以作为一个独立的分布式文件系统。虽然HDFS在很多环境下被设计成是可正确工作的,但是了解HDFS的工作原理对在特定集群上改进HDFS的运行性能和错误诊断都有极大的帮助。
+      </p>
+</div>
+<!--DCCOMMENT:diff end
+@@ -23,18 +23,18 @@
+
+   <header>
+     <title>
+-      Hadoop DFS User Guide
++      HDFS User Guide
+     </title>
+   </header>
+
+   <body>
+     <section> <title>Purpose</title>
+       <p>
+- This document aims to be the starting point for users working with
++ This document is a starting point for users working with
+  Hadoop Distributed File System (HDFS) either as a part of a
+  <a href="http://hadoop.apache.org/">Hadoop</a>
+  cluster or as a stand-alone general purpose distributed file system.
+- While HDFS is designed to "just-work" in many environments, a working
++ While HDFS is designed to "just work" in many environments, a working
+  knowledge of HDFS helps greatly with configuration improvements and
+  diagnostics on a specific cluster.
+       </p>
+
+-->
+<!--DCCOMMENT:begin-->
+
+    
+<a name="N10021"></a><a name="%E6%A6%82%E8%BF%B0"></a>
+<h2 class="h3"> 概述 </h2>
+<div class="section">
+<p>
+HDFS是Hadoop应用用到的一个最主要的分布式存储系统。一个HDFS集群主要由一个NameNode和很多个Datanode组成:Namenode管理文件系统的元数据,而Datanode存储了实际的数据。HDFS的体系结构在<a href="hdfs_design.html">这里</a>有详细的描述。本文档主要关注用户以及管理员怎样和HDFS进行交互。<a href="hdfs_design.html">HDFS架构设计</a>中的<a href="images/hdfsarchitecture.gif">图解</a>描述了Namenode、Datanode和客户端之间的基本的交互操作。基本上,客户端联系Namenode以获取文件的元数据或修饰属性,而真正的文件I/O操作是直接和Datanode进行交互的。
+      </p>
+<p>
+      下面列出了一些多数用户都比较感兴趣的重要特性。
+      </p>
+<ul>
+    
+<li>
+<!--DCCOMMENT:end
+note:all tag "<em>" has been deleted in this doc.
+
+@@ -43,21 +43,20 @@
+     <section> <title> Overview </title>
+       <p>
+  HDFS is the primary distributed storage used by Hadoop applications. A
+- HDFS cluster primarily consists of a <em>NameNode</em> that manages the
+- filesystem metadata and Datanodes that store the actual data. The
++ HDFS cluster primarily consists of a NameNode that manages the
++ file system metadata and DataNodes that store the actual data. The
+  architecture of HDFS is described in detail
+  <a href="hdfs_design.html">here</a>. This user guide primarily deals with
+  interaction of users and administrators with HDFS clusters.
+  The <a href="images/hdfsarchitecture.gif">diagram</a> from
+  <a href="hdfs_design.html">HDFS architecture</a> depicts
+- basic interactions among Namenode, Datanodes, and the clients. Eseentially,
+- clients contact Namenode for file metadata or file modifications and perform
+- actual file I/O directly with the datanodes.
++ basic interactions among NameNode, the DataNodes, and the clients.
++ Clients contact NameNode for file metadata or file modifications and perform
++ actual file I/O directly with the DataNodes.
+       </p>
+       <p>
+  The following are some of the salient features that could be of
+- interest to many users. The terms in <em>italics</em>
+- are described in later sections.
++ interest to many users.
+       </p>
+     <ul>
+     <li>
+-->
+    Hadoop(包括HDFS)非常适合在商用硬件(commodity hardware)上做分布式存储和计算,因为它不仅具有容错性和可扩展性,而且非常易于扩展。<a href="mapred_tutorial.html">Map-Reduce</a>框架以其在大型分布式系统应用上的简单性和可用性而著称,这个框架已经被集成进Hadoop中。
+    </li>
+    
+<li>
+    	HDFS的可配置性极高,同时,它的默认配置能够满足很多的安装环境。多数情况下,这些参数只在非常大规模的集群环境下才需要调整。
+    </li>
+<!--DCCOMMENT:diff begin-->
+    
+<li>
+    	用Java语言开发,支持所有的主流平台。
+    </li>
+    
+<li>
+    	支持类Shell命令,可直接和HDFS进行交互。
+    </li>
+    
+<li>
+    	NameNode和DataNode有内置的Web服务器,方便用户检查集群的当前状态。
+    </li>
+<!--DCCOMMENT:diff end
+@@ -74,13 +73,13 @@
+        needs to be tuned only for very large clusters.
+     </li>
+     <li>
+-       It is written in Java and is supported on all major platforms.
++       Hadoop is written in Java and is supported on all major platforms.
+     </li>
+     <li>
+-       Supports <em>shell like commands</em> to interact with HDFS directly.
++       Hadoop supports shell-like commands to interact with HDFS directly.
+     </li>
+     <li>
+-       Namenode and Datanodes have built in web servers that makes it
++       The NameNode and Datanodes have built in web servers that makes it
+        easy to check current status of the cluster.
+     </li>
+     <li>
+-->
+    
+<li>
+	新特性和改进会定期加入HDFS的实现中。下面列出的是HDFS中常用特性的一部分:
+      <ul>
+    	
+<li>
+    		文件权限和授权。
+    	</li>
+    	
+<li>
+    		机架感知(Rack awareness):在调度任务和分配存储空间时考虑节点的物理位置。
+    	</li>
+    	
+<li>
+    		安全模式:一种维护需要的管理模式。
+    	</li>
+    	
+<li>
+    		fsck:一个诊断文件系统健康状况的工具,能够发现丢失的文件或数据块。
+    	</li>
+    	
+<li>
+    		Rebalancer:当datanode之间数据不均衡时,平衡集群上的数据负载。
+    	</li>
+    	
+<li>
+    		升级和回滚:在软件更新后有异常发生的情形下,能够回滚到HDFS升级之前的状态。
+    	</li>
+    	
+<li>
+		Secondary Namenode:对文件系统名字空间执行周期性的检查点,将Namenode上HDFS改动日志文件的大小控制在某个特定的限度下。
+    	</li>
+      
+</ul>
+    
+</li>
+    
+</ul>
+</div> 
+<a name="N10071"></a><a name="%E5%85%88%E5%86%B3%E6%9D%A1%E4%BB%B6"></a>
+<h2 class="h3"> 先决条件 </h2>
+<div class="section">
+<p>
+    下面的文档描述了如何安装和搭建Hadoop集群:
+    </p>
+<ul>
+ 	
+<li>
+ 		
+<a href="quickstart.html">Hadoop快速入门</a>
+ 		针对初次使用者。
+ 	</li>
+ 	
+<li>
+		
+<a href="cluster_setup.html">Hadoop集群搭建</a>
+ 		针对大规模分布式集群的搭建。
+ 	</li>
+    
+</ul>
+<p>
+    文档余下部分假设用户已经安装并运行了至少包含一个Datanode节点的HDFS。就本文目的来说,Namenode和Datanode可以运行在同一个物理主机上。
+    </p>
+</div> 
+<a name="N1008F"></a><a name="Web%E6%8E%A5%E5%8F%A3"></a>
+<h2 class="h3"> Web接口 </h2>
+<div class="section">
+<p>
+ 	NameNode和DataNode各自启动了一个内置的Web服务器,显示了集群当前的基本状态和信息。在默认配置下NameNode的首页地址是<span class="codefrag">http://namenode-name:50070/</span>。这个页面列出了集群里的所有DataNode和集群的基本状态。这个Web接口也可以用来浏览整个文件系统(使用NameNode首页上的"Browse the file system"链接)。
+ </p>
+</div> 
+<a name="N100A2"></a><a name="Shell%E5%91%BD%E4%BB%A4"></a>
+<h2 class="h3">Shell命令</h2>
+<div class="section">
+<p>Hadoop包括一系列的类shell的命令,可直接和HDFS以及其他Hadoop支持的文件系统进行交互。<span class="codefrag">bin/hadoop fs -help</span> 命令列出所有Hadoop Shell支持的命令。而 <span class="codefrag">bin/hadoop fs -help command-name</span> 命令能显示关于某个命令的详细信息。这些命令支持大多数普通文件系统的操作,比如复制文件、改变文件权限等。它还支持一些HDFS特有的操作,比如改变文件副本数目。
+     </p>
+<a name="N100B3"></a><a name="DFSAdmin%E5%91%BD%E4%BB%A4"></a>
+<h3 class="h4"> DFSAdmin命令 </h3>
+<p>
+   	
+<span class="codefrag">'bin/hadoop dfsadmin'</span> 命令支持一些和HDFS管理相关的操作。<span class="codefrag">bin/hadoop dfsadmin -help</span> 命令能列出所有当前支持的命令。比如:
+   </p>
+<ul>
+   	
+<li>
+<!--DCCOMMENT:diff begin-->
+   	    
+<span class="codefrag">-report</span>:报告HDFS的基本统计信息。有些信息也可以在NameNode Web服务首页看到。
+<!--DCCOMMENT:diff end
+note: "Namenode" is replaced by "NameNode" in this doc
+
+        <li>
+            <code>-report</code>
+-           : reports basic stats of HDFS. Some of this information is
+-           also available on the Namenode front page.
++           : reports basic statistics of HDFS. Some of this information is
++           also available on the NameNode front page.
+        </li>
+-->
+   	</li>
+   	
+<li>
+   	    
+<span class="codefrag">-safemode</span>:虽然通常并不需要,但是管理员的确可以手动让NameNode进入或离开安全模式。
+   	</li>
+   	
+<li>
+   	    
+<span class="codefrag">-finalizeUpgrade</span>:删除上一次升级时制作的集群备份。
+   	</li>
+   	
+</ul>
+</div> 
+<a name="N100DD"></a><a name="Secondary+NameNode"></a>
+<h2 class="h3"> Secondary NameNode </h2>
+<div class="section">
+<p>NameNode将对文件系统的改动追加保存到本地文件系统上的一个日志文件(<span class="codefrag">edits</span>)。当一个NameNode启动时,它首先从一个映像文件(<span class="codefrag">fsimage</span>)中读取HDFS的状态,接着应用日志文件中的edits操作。然后它将新的HDFS状态写入(<span class="codefrag">fsimage</span>)中,并使用一个空的edits文件开始正常操作。因为NameNode只有在启动阶段才合并<span class="codefrag">fsimage</span>和<span class="codefrag">edits</span>,所以久而久之日志文件可能会变得非常庞大,特别是对大型的集群。日志文件太大的另一个副作用是下一次NameNode启动会花很长时间。
+   </p>
+<p>
+     Secondary NameNode定期合并fsimage和edits日志,将edits日志文件大小控制在一个限度下。因为内存需求和NameNode在一个数量级上,所以通常secondary NameNode和NameNode运行在不同的机器上。Secondary NameNode通过<span class="codefrag">bin/start-dfs.sh</span>在<span class="codefrag">conf/masters</span>中指定的节点上启动。
+   </p>
+<p>
+Secondary NameNode的检查点进程启动,是由两个配置参数控制的:
+</p>
+<ul>
+      
+<li>
+        
+<span class="codefrag">fs.checkpoint.period</span>,指定连续两次检查点的最大时间间隔,
+        默认值是1小时。
+      </li>
+      
+<li>
+        
+<span class="codefrag">fs.checkpoint.size</span>定义了edits日志文件的最大值,一旦超过这个值会导致强制执行检查点(即使没到检查点的最大时间间隔)。默认值是64MB。
+      </li>
+   
+</ul>
+<p>
+     Secondary NameNode保存最新检查点的目录与NameNode的目录结构相同。
+     所以NameNode可以在需要的时候读取Secondary NameNode上的检查点镜像。
+   </p>
+<p>
+     如果NameNode上除了最新的检查点以外,所有的其他的历史镜像和edits文件都丢失了,
+     NameNode可以引入这个最新的检查点。以下操作可以实现这个功能:
+   </p>
+<ul>
+      
+<li>
+        在配置参数<span class="codefrag">dfs.name.dir</span>指定的位置建立一个空文件夹;
+      </li>
+      
+<li>
+        把检查点目录的位置赋值给配置参数<span class="codefrag">fs.checkpoint.dir</span>;
+      </li>
+      
+<li>
+        启动NameNode,并加上<span class="codefrag">-importCheckpoint</span>。 
+      </li>
+   
+</ul>
+<p>
+     NameNode会从<span class="codefrag">fs.checkpoint.dir</span>目录读取检查点,
+     并把它保存在<span class="codefrag">dfs.name.dir</span>目录下。
+     如果<span class="codefrag">dfs.name.dir</span>目录下有合法的镜像文件,NameNode会启动失败。
+     NameNode会检查<span class="codefrag">fs.checkpoint.dir</span>目录下镜像文件的一致性,但是不会去改动它。
+   </p>
+<p>
+     命令的使用方法请参考<a href="commands_manual.html#secondarynamenode"><span class="codefrag">secondarynamenode</span> 命令</a>.
+   </p>
+</div> 
+<a name="N10148"></a><a name="Rebalancer"></a>
+<h2 class="h3"> Rebalancer </h2>
+<div class="section">
+<p>
+      HDFS的数据也许并不是非常均匀的分布在各个DataNode中。一个常见的原因是在现有的集群上经常会增添新的DataNode节点。当新增一个数据块(一个文件的数据被保存在一系列的块中)时,NameNode在选择DataNode接收这个数据块之前,会考虑到很多因素。其中的一些考虑的是:
+    </p>
+<ul>
+      
+<li>
+	将数据块的一个副本放在正在写这个数据块的节点上。
+      </li>
+      
+<li>
+        尽量将数据块的不同副本分布在不同的机架上,这样集群可在完全失去某一机架的情况下还能存活。
+      </li>
+      
+<li>
+        一个副本通常被放置在和写文件的节点同一机架的某个节点上,这样可以减少跨越机架的网络I/O。
+      </li>
+      
+<li>
+        尽量均匀地将HDFS数据分布在集群的DataNode中。
+      </li>
+      
+</ul>
+<p>
+由于上述多种考虑需要取舍,数据可能并不会均匀分布在DataNode中。HDFS为管理员提供了一个工具,用于分析数据块分布和重新平衡DataNode上的数据分布。<a href="http://issues.apache.org/jira/browse/HADOOP-1652">HADOOP-1652</a>的附件中的一个<a href="http://issues.apache.org/jira/secure/attachment/12368261/RebalanceDesign6.pdf">PDF</a>是一个简要的rebalancer管理员指南。
+    </p>
+<p>
+     使用方法请参考<a href="commands_manual.html#balancer">balancer 命令</a>.
+   </p>
+</div> 
+<a name="N1017B"></a><a name="%E6%9C%BA%E6%9E%B6%E6%84%9F%E7%9F%A5%EF%BC%88Rack+awareness%EF%BC%89"></a>
+<h2 class="h3"> 机架感知(Rack awareness) </h2>
+<div class="section">
+<p>
+      通常,大型Hadoop集群是以机架的形式来组织的,同一个机架上不同节点间的网络状况比不同机架之间的更为理想。另外,NameNode设法将数据块副本保存在不同的机架上以提高容错性。Hadoop允许集群的管理员通过配置<span class="codefrag">dfs.network.script</span>参数来确定节点所处的机架。当这个脚本配置完毕,每个节点都会运行这个脚本来获取它的机架ID。默认的安装假定所有的节点属于同一个机架。这个特性及其配置参数在<a href="http://issues.apache.org/jira/browse/HADOOP-692">HADOOP-692</a>所附的<a href="http://issues.apache.org/jira/secure/attachment/12345251/Rack_aware_HDFS_proposal.pdf">PDF</a>上有更详细的描述。
+    </p>
+</div> 
+<a name="N10190"></a><a name="%E5%AE%89%E5%85%A8%E6%A8%A1%E5%BC%8F"></a>
+<h2 class="h3"> 安全模式 </h2>
+<div class="section">
+<p>
+     NameNode启动时会从fsimage和edits日志文件中装载文件系统的状态信息,接着它等待各个DataNode向它报告它们各自的数据块状态,这样,NameNode就不会过早地开始复制数据块,即使在副本充足的情况下。这个阶段,NameNode处于安全模式下。NameNode的安全模式本质上是HDFS集群的一种只读模式,此时集群不允许任何对文件系统或者数据块修改的操作。通常NameNode会在开始阶段自动地退出安全模式。如果需要,你也可以通过<span class="codefrag">'bin/hadoop dfsadmin -safemode'</span>命令显式地将HDFS置于安全模式。NameNode首页会显示当前是否处于安全模式。关于安全模式的更多介绍和配置信息请参考JavaDoc:<a href="http://hadoop.apache.org/core/docs/current/api/org/apache/hadoop/dfs/NameNode.html#setSafeMode(org.apache.hadoop.dfs.FSConstants.SafeModeAction)"><span class="codefrag">setSafeMode()</span></a>。
+    </p>
+</div> 
+<a name="N101A2"></a><a name="fsck"></a>
+<h2 class="h3"> fsck </h2>
+<div class="section">
+<p>    
+      HDFS支持<span class="codefrag">fsck</span>命令来检查系统中的各种不一致状况。这个命令被设计来报告各种文件存在的问题,比如文件缺少数据块或者副本数目不够。不同于在本地文件系统上传统的fsck工具,这个命令并不会修正它检测到的错误。一般来说,NameNode会自动修正大多数可恢复的错误。HDFS的fsck不是一个Hadoop shell命令。它通过'<span class="codefrag">bin/hadoop fsck</span>'执行。
+<!--DCCOMMENT:diff begin-->
+命令的使用方法请参考<a href="commands_manual.html#fsck"><span class="codefrag">fsck</span>命令</a>
+<span class="codefrag">fsck</span>可用来检查整个文件系统,也可以只检查部分文件。
+<!--DCCOMMENT:diff end
+ Hadoop shell command. It can be run as '<code>bin/hadoop fsck</code>'.
+-      Fsck can be run on the whole filesystem or on a subset of files.
++      For command usage, see <a href="commands_manual.html#fsck"><code>fsck</code> command</a>.
++      <code>fsck</code> can be run on the whole file system or on a subset of files.
+      </p>
+
+-->
+     </p>
+</div> 
+<a name="N101BF"></a><a name="%E5%8D%87%E7%BA%A7%E5%92%8C%E5%9B%9E%E6%BB%9A"></a>
+<h2 class="h3"> 升级和回滚 </h2>
+<div class="section">
+<p>当在一个已有集群上升级Hadoop时,像其他的软件升级一样,可能会有新的bug或一些会影响到现有应用的非兼容性变更出现。在任何有实际意义的HDSF系统上,丢失数据是不被允许的,更不用说重新搭建启动HDFS了。HDFS允许管理员退回到之前的Hadoop版本,并将集群的状态回滚到升级之前。更多关于HDFS升级的细节在<a href="http://wiki.apache.org/hadoop/Hadoop%20Upgrade">升级wiki</a>上可以找到。HDFS在一个时间可以有一个这样的备份。在升级之前,管理员需要用<span class="codefrag">bin/hadoop dfsadmin -finalizeUpgrade</span>(升级终结操作)命令删除存在的备份文件。下面简单介绍一下一般的升级过程:
+     </p>
+<ul>
+      
+<li>升级 Hadoop 软件之前,请检查是否已经存在一个备份,如果存在,可执行升级终结操作删除这个备份。通过<span class="codefrag">dfsadmin -upgradeProgress status</span>命令能够知道是否需要对一个集群执行升级终结操作。</li>
+      
+<li>停止集群并部署新版本的Hadoop。</li>
+      
+<li>使用<span class="codefrag">-upgrade</span>选项运行新的版本(<span class="codefrag">bin/start-dfs.sh -upgrade</span>)。
+      </li>
+      
+<li>在大多数情况下,集群都能够正常运行。一旦我们认为新的HDFS运行正常(也许经过几天的操作之后),就可以对之执行升级终结操作。注意,在对一个集群执行升级终结操作之前,删除那些升级前就已经存在的文件并不会真正地释放DataNodes上的磁盘空间。</li>
+      
+<li>如果需要退回到老版本,
+	<ul>
+          
+<li>停止集群并且部署老版本的Hadoop。</li>
+          
+<li>用回滚选项启动集群(<span class="codefrag">bin/start-dfs.h -rollback</span>)。</li>
+        
+</ul>
+      
+</li>
+      
+</ul>
+</div> 
+<a name="N101F7"></a><a name="%E6%96%87%E4%BB%B6%E6%9D%83%E9%99%90%E5%92%8C%E5%AE%89%E5%85%A8%E6%80%A7"></a>
+<h2 class="h3"> 文件权限和安全性 </h2>
+<div class="section">
+<p>           
+      这里的文件权限和其他常见平台如Linux的文件权限类似。目前,安全性仅限于简单的文件权限。启动NameNode的用户被视为HDFS的超级用户。HDFS以后的版本将会支持网络验证协议(比如Kerberos)来对用户身份进行验证和对数据进行加密传输。具体的细节请参考<a href="hdfs_permissions_guide.html">权限使用管理指南</a>。
+     </p>
+</div> 
+<a name="N10205"></a><a name="%E5%8F%AF%E6%89%A9%E5%B1%95%E6%80%A7"></a>
+<h2 class="h3"> 可扩展性 </h2>
+<div class="section">
+<p>
+      现在,Hadoop已经运行在上千个节点的集群上。<a href="http://wiki.apache.org/hadoop/PoweredBy">Powered By Hadoop</a>页面列出了一些已将Hadoop部署在他们的大型集群上的组织。HDFS集群只有一个NameNode节点。目前,NameNode上可用内存大小是一个主要的扩展限制。在超大型的集群中,增大HDFS存储文件的平均大小能够增大集群的规模,而不需要增加NameNode的内存。默认配置也许并不适合超大规模的集群。<a href="http://wiki.apache.org/hadoop/FAQ">Hadoop FAQ</a>页面列举了针对大型Hadoop集群的配置改进。</p>
+</div> 
+<a name="N10217"></a><a name="%E7%9B%B8%E5%85%B3%E6%96%87%E6%A1%A3"></a>
+<h2 class="h3"> 相关文档 </h2>
+<div class="section">
+<p>
+      这个用户手册给用户提供了一个学习和使用HDSF文件系统的起点。本文档会不断地进行改进,同时,用户也可以参考更多的Hadoop和HDFS文档。下面的列表是用户继续学习的起点:
+      </p>
+<ul>
+      
+<li>
+        
+<a href="http://hadoop.apache.org/">Hadoop官方主页</a>:所有Hadoop相关的起始页。
+      </li>
+      
+<li>
+        
+<a href="http://wiki.apache.org/hadoop/FrontPage">Hadoop Wiki</a>:Hadoop Wiki文档首页。这个指南是Hadoop代码树中的一部分,与此不同,Hadoop Wiki是由Hadoop社区定期编辑的。
+      </li>
+      
+<li>Hadoop Wiki上的<a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>。
+      </li>
+      
+<li>Hadoop <a href="http://hadoop.apache.org/core/docs/current/api/">JavaDoc API</a>。</li>
+      
+<li>Hadoop用户邮件列表:<a href="mailto:core-user@hadoop.apache.org">core-user[at]hadoop.apache.org</a>。</li>
+      
+<li>查看<span class="codefrag">conf/hadoop-default.xml</span>文件。这里包括了大多数配置参数的简要描述。</li>
+      
+<li>
+        
+<a href="commands_manual.html">命令手册</a>:命令使用说明。
+      </li>
+<!--DCCOMMENT:diff begin-->
+<!--DCCOMMENT:diff end
+@@ -411,6 +469,10 @@
+          It includes brief
+          description of most of the configuration variables available.
+       </li>
++      <li>
++        <a href="commands_manual.html">Commands Manual</a>
++        : commands usage.
++      </li>
+       </ul>
+      </section>
+
+-->
+      
+</ul>
+</div>
+     
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 195 - 0
docs/cn/hdfs_user_guide.pdf


+ 257 - 0
docs/cn/hod.html

@@ -0,0 +1,257 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title> 
+      Hadoop On Demand
+    </title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://hadoop.apache.org/core/">项目</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop">维基</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.18文档</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">文档</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">概述</a>
+</div>
+<div class="menuitem">
+<a href="quickstart.html">快速入门</a>
+</div>
+<div class="menuitem">
+<a href="cluster_setup.html">集群搭建</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_design.html">HDFS构架设计</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_user_guide.html">HDFS使用指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_permissions_guide.html">HDFS权限指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_quota_admin_guide.html">HDFS配额管理指南</a>
+</div>
+<div class="menuitem">
+<a href="commands_manual.html">命令手册</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_shell.html">FS Shell使用指南</a>
+</div>
+<div class="menuitem">
+<a href="distcp.html">DistCp使用指南</a>
+</div>
+<div class="menuitem">
+<a href="mapred_tutorial.html">Map-Reduce教程</a>
+</div>
+<div class="menuitem">
+<a href="native_libraries.html">Hadoop本地库</a>
+</div>
+<div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
+<a href="hadoop_archives.html">Hadoop Archives</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">Hadoop On Demand</div>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/">维基</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">发行说明</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">变更日志</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="hod.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1> 
+      Hadoop On Demand
+    </h1>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#%E7%AE%80%E4%BB%8B">简介</a>
+</li>
+<li>
+<a href="#%E6%96%87%E6%A1%A3">文档</a>
+</li>
+</ul>
+</div>
+  
+<a name="N1000D"></a><a name="%E7%AE%80%E4%BB%8B"></a>
+<h2 class="h3">简介</h2>
+<div class="section">
+<p>Hadoop On Demand(HOD)是一个能在大型物理集群上供应虚拟hadoop集群的系统。它使用Torque资源管理器分配节点。它可以在分配的节点上启动Hadoop Map/Reduce和HDFS的守护进程。它会自动为Hadoop守护进程和客户端生成合适的配置文件(hadoop-site.xml)。HOD还可以将Hadoop分发到它分配出来的虚拟Hadoop集群的节点上。简而言之,HOD使管理员和用户轻松地快速搭建和使用hadoop。它也是Hadoop开发人员和测试人员非常有用的一个工具,他们可以使用HOD共享一个物理集群来测试各自的Hadoop版本。</p>
+</div>
+      
+<a name="N10017"></a><a name="%E6%96%87%E6%A1%A3"></a>
+<h2 class="h3">文档</h2>
+<div class="section">
+<p>读一遍下面的文档,你会在使用HOD方面了解更多</p>
+<ul>
+        
+<li>
+<a href="hod_admin_guide.html">HOD管理指南</a> : 此指南概述了HOD的体系结构,Torque资源管理器及其他各种支持工具,也会告诉你如何安装,配置和运行HOD。</li>
+        
+<li>
+<a href="hod_config_guide.html">HOD配置指南</a> : 此指南讨论HOD的配置段,会告诉你如何使用那些最重要和最常用的配置项。</li>
+        
+<li>
+<a href="hod_user_guide.html">HOD用户指南</a> : 此指南会告诉你如何开始使用HOD,它的各种功能特性,命令行选项,也会给你一些故障解决方面的详细帮助。</li>
+      
+</ul>
+</div>
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 144 - 0
docs/cn/hod.pdf


+ 557 - 0
docs/cn/hod_admin_guide.html

@@ -0,0 +1,557 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title> 
+      Hadoop On Demand
+    </title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://hadoop.apache.org/core/">项目</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop">维基</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.18文档</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">文档</div>
+<div id="menu_1.1" class="menuitemgroup">
+<div class="menuitem">
+<a href="index.html">概述</a>
+</div>
+<div class="menuitem">
+<a href="quickstart.html">快速入门</a>
+</div>
+<div class="menuitem">
+<a href="cluster_setup.html">集群搭建</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_design.html">HDFS构架设计</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_user_guide.html">HDFS使用指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_permissions_guide.html">HDFS权限指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_quota_admin_guide.html">HDFS配额管理指南</a>
+</div>
+<div class="menuitem">
+<a href="commands_manual.html">命令手册</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_shell.html">FS Shell使用指南</a>
+</div>
+<div class="menuitem">
+<a href="distcp.html">DistCp使用指南</a>
+</div>
+<div class="menuitem">
+<a href="mapred_tutorial.html">Map-Reduce教程</a>
+</div>
+<div class="menuitem">
+<a href="native_libraries.html">Hadoop本地库</a>
+</div>
+<div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
+<a href="hadoop_archives.html">Hadoop Archives</a>
+</div>
+<div class="menuitem">
+<a href="hod.html">Hadoop On Demand</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/">维基</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">发行说明</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">变更日志</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="hod_admin_guide.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1> 
+      Hadoop On Demand
+    </h1>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#%E6%A6%82%E8%BF%B0">概述</a>
+</li>
+<li>
+<a href="#%E5%85%88%E5%86%B3%E6%9D%A1%E4%BB%B6">先决条件</a>
+</li>
+<li>
+<a href="#%E8%B5%84%E6%BA%90%E7%AE%A1%E7%90%86%E5%99%A8">资源管理器</a>
+</li>
+<li>
+<a href="#%E5%AE%89%E8%A3%85HOD">安装HOD</a>
+</li>
+<li>
+<a href="#%E9%85%8D%E7%BD%AEHOD">配置HOD</a>
+<ul class="minitoc">
+<li>
+<a href="#%E6%9C%80%E5%B0%8F%E9%85%8D%E7%BD%AE">最小配置</a>
+</li>
+<li>
+<a href="#%E9%AB%98%E7%BA%A7%E9%85%8D%E7%BD%AE">高级配置</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#%E8%BF%90%E8%A1%8CHOD">运行HOD</a>
+</li>
+<li>
+<a href="#%E6%94%AF%E6%8C%81%E5%B7%A5%E5%85%B7%E5%92%8C%E5%AE%9E%E7%94%A8%E7%A8%8B%E5%BA%8F">支持工具和实用程序</a>
+<ul class="minitoc">
+<li>
+<a href="#logcondense.py+-+%E7%AE%A1%E7%90%86%E6%97%A5%E5%BF%97%E6%96%87%E4%BB%B6">logcondense.py - 管理日志文件</a>
+<ul class="minitoc">
+<li>
+<a href="#%E8%BF%90%E8%A1%8Clogcondense.py">运行logcondense.py</a>
+</li>
+<li>
+<a href="#logcondense.py%E7%9A%84%E5%91%BD%E4%BB%A4%E8%A1%8C%E9%80%89%E9%A1%B9">logcondense.py的命令行选项</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#checklimits.sh+-+%E7%9B%91%E8%A7%86%E8%B5%84%E6%BA%90%E9%99%90%E5%88%B6">checklimits.sh - 监视资源限制</a>
+<ul class="minitoc">
+<li>
+<a href="#%E8%BF%90%E8%A1%8Cchecklimits.sh">运行checklimits.sh</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#verify-account+-+%E7%94%A8%E4%BA%8E%E6%A0%B8%E5%AE%9E%E7%94%A8%E6%88%B7%E6%8F%90%E4%BA%A4%E4%BD%9C%E4%B8%9A%E6%89%80%E4%BD%BF%E7%94%A8%E7%9A%84%E5%B8%90%E5%8F%B7%E7%9A%84%E8%84%9A%E6%9C%AC">verify-account - 用于核实用户提交作业所使用的帐号的脚本</a>
+<ul class="minitoc">
+<li>
+<a href="#%E5%9C%A8HOD%E4%B8%AD%E9%9B%86%E6%88%90verify-account">在HOD中集成verify-account</a>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+
+<a name="N1000C"></a><a name="%E6%A6%82%E8%BF%B0"></a>
+<h2 class="h3">概述</h2>
+<div class="section">
+<p>Hadoop On Demand (HOD)是一个能在一个共享集群上供应和管理相互独立的Hadoop Map/Reduce和Hadoop分布式文件系统(HDFS)实例的系统。它能让管理员和用户轻松地快速搭建和使用hadoop。HOD对Hadoop的开发人员和测试人员也非常有用,他们可以通过HOD共享一个物理集群来测试各自不同的Hadoop版本。</p>
+<p>HOD依赖资源管理器(RM)来分配节点,这些节点被用来在之上运行hadoop实例。目前,HOD采用的是<a href="http://www.clusterresources.com/pages/products/torque-resource-manager.php">Torque资源管理器</a>。
+</p>
+<p>
+基本的HOD系统架构包含的下列组件:</p>
+<ul>
+  
+<li>一个资源管理器(可能同时附带一个调度程序)</li>
+  
+<li>各种HOD的组件 </li>
+  
+<li>Hadoop Map/Reduce和HDFS守护进程</li>
+
+</ul>
+<p>
+通过与以上组件交互,HOD在给定的集群上供应和维护Hadoop Map/Reduce实例,或者HDFS实例。集群中的节点可看作由两组节点构成:</p>
+<ul>
+  
+<li>提交节点(Submit nodes):用户通过HOD客户端在这些节点上申请集群,之后通过Hadoop客户端提交Hadoop作业。</li>
+  
+<li>计算节点(Compute nodes):利用资源管理器,HOD组件在这些节点上运行以供应Hadoop守护进程。之后,Hadoop作业在这些节点上运行。</li>
+
+</ul>
+<p>
+下面是对申请集群及在之上运行作业所需操作步骤的简要描述。
+</p>
+<ul>
+  
+<li>用户在提交节点上用HOD客户端分配所需数目节点的集群,在上面供应Hadoop。</li>
+  
+<li>HOD客户端利用资源管理器接口(在Torque中是qsub)提交一个被称为RingMaster的HOD进程作为一个资源管理器作业,申请理想数目的节点。这个作业被提交到资源管理器的中央服务器上(在Torque中叫pbs_server)。</li>
+  
+<li>在计算节点上,资源管理器的从(slave)守护程序(Torque中的pbs_moms)接受并处理中央服务器(Torque中的pbs_server)分配的作业。RingMaster进程在其中一个计算节点(Torque中的mother superior)上开始运行。</li>
+  
+<li>之后,Ringmaster通过资源管理器的另外一个接口(在Torque中是pbsdsh)在所有分配到的计算节点上运行第二个HOD组件HodRing,即分布式任务。</li>
+  
+<li>HodRing初始化之后会与RingMaster通信获取Hadoop指令,并遵照执行。一旦Hadoop的命令开始启动,它们会向RingMaster登记,提供关于守护进程的信息。</li>
+  
+<li>Hadoop实例所需的配置文件全部由HOD自己生成,有一些来自于用户在配置文件设置的选项。</li>
+  
+<li>HOD客户端保持和RingMaster的通信,找出JobTracker和HDFS守护进程的位置所在。</li>
+
+</ul>
+<p>之后的文档会讲述如何在一个物理集群的节点上安装HOD。</p>
+</div>
+
+
+<a name="N10056"></a><a name="%E5%85%88%E5%86%B3%E6%9D%A1%E4%BB%B6"></a>
+<h2 class="h3">先决条件</h2>
+<div class="section">
+<p>要使用HOD,你的系统应包含下列的硬件和软件</p>
+<p>操作系统: HOD目前在RHEL4上测试通过。<br>
+节点:HOD至少需要3个由资源管理器配置的节点。<br>
+</p>
+<p>软件</p>
+<p>在使用HOD之前,以下组件必须被安装到所有节点上:</p>
+<ul>
+ 
+<li>Torque:资源管理器</li>
+ 
+<li>
+<a href="http://www.python.org">Python</a>:HOD要求Python 2.5.1</li>
+
+</ul>
+<p>下列组件是可选的,你可以安装以获取HOD更好的功能:</p>
+<ul>
+ 
+<li>
+<a href="http://twistedmatrix.com/trac/">Twisted Python</a>:这个可以用来提升HOD的可扩展性。如果检测到这个模块已安装,HOD就用它,否则就使用默认的模块。</li>
+
+ 
+<li>
+<a href="http://hadoop.apache.org/core/">Hadoop</a>:HOD能自动将Hadoop分发到集群的所有节点上。不过,如果Hadoop在所有节点上已经可用,HOD也可以使用已经安装好的Hadoop。HOD目前支持Hadoop 0.15和其后续版本。</li>
+
+</ul>
+<p>注释: HOD的配置需要以上这些组件的安装位置在集群所有节点上保持一致。如果在提交节点上的安装位置也相同,配置起来会更简单。</p>
+</div>
+
+
+<a name="N1008D"></a><a name="%E8%B5%84%E6%BA%90%E7%AE%A1%E7%90%86%E5%99%A8"></a>
+<h2 class="h3">资源管理器</h2>
+<div class="section">
+<p>目前,HOD使用Torque资源管理器来分配节点和提交作业。Torque是一个开源的资源管理器,来自于<a href="http://www.clusterresources.com">Cluster Resources</a>,是一个社区基于PBS项目努力的结晶。它提供对批处理作业和分散的计算节点(Compute nodes)的控制。你可以自由地从<a href="http://www.clusterresources.com/downloads/torque/">此处</a>下载Torque。</p>
+<p>所有torque相关的文档可以在<a href="http://www.clusterresources.com/pages/resources/documentation.php">这儿</a>的TORQUE Resource Manager一节找到。在<a href="http://www.clusterresources.com/wiki/doku.php?id=torque:torque_wiki">这里</a>可以看到wiki文档。如果想订阅TORQUE的邮件列表或查看问题存档,访问<a href="http://www.clusterresources.com/pages/resources/mailing-lists.php">这里</a>。</p>
+<p>使用带Torque的HOD:</p>
+<ul>
+
+<li>安装Torque组件:在一个节点上(head node)安装pbs_server,所有计算节点上安装pbs_mom,所有计算节点和提交节点上安装PBS客户端。至少做最基本的配置,使Torque系统跑起来,也就是,使pbs_server能知道该和哪些机器通话。查看<a href="http://www.clusterresources.com/wiki/doku.php?id=torque:1.2_basic_configuration">这里</a>可以了解基本配置。要了解高级配置,请查看<a href="http://www.clusterresources.com/wiki/doku.php?id=torque:1.3_advanced_configuration">这里</a>。</li>
+ 
+<li>在pbs_server上创建一个作业提交队列。队列的名字和HOD的配置参数resource-manager.queue相同。Hod客户端利用此队列提交RingMaster进程作为Torque作业。</li>
+ 
+<li>在集群的所有节点上指定一个cluster name作为property。这可以用qmgr命令做到。比如:<span class="codefrag">qmgr -c "set node node properties=cluster-name"</span>。集群名字和HOD的配置参数hod.cluster是相同的。</li>
+ 
+<li>确保作业可以提交到节点上去。这可以通过使用qsub命令做到。比如:<span class="codefrag">echo "sleep 30" | qsub -l nodes=3</span>
+</li>
+
+</ul>
+</div>
+
+
+<a name="N100CD"></a><a name="%E5%AE%89%E8%A3%85HOD"></a>
+<h2 class="h3">安装HOD</h2>
+<div class="section">
+<p>现在资源管理器已经安装好了,我们接着下载并安装HOD。</p>
+<ul>
+ 
+<li>如果你想从Hadoop tar包中获取HOD,它在'contrib'下的'hod'的根目录下。</li>
+ 
+<li>如果你从编译源码,可以在Hadoop根目录下的运行ant tar, 生成Hadoop tar包。然后从获取HOD,参照上面。</li>
+ 
+<li>把这个目录下的所有文件分发到集群的所有节点上。注意文件拷贝的位置应在所有节点上保持一致。</li>
+ 
+<li>注意,编译hadoop时会创建HOD,同时会正确地设置所有HOD必须的脚本文件的权限。</li>
+
+</ul>
+</div>
+
+
+<a name="N100E6"></a><a name="%E9%85%8D%E7%BD%AEHOD"></a>
+<h2 class="h3">配置HOD</h2>
+<div class="section">
+<p>安装HOD后你就可以配置它。为了运行HOD需要做的最小配置会在下面讲述,更多高级的配置会在HOD配置指南里面讲解。</p>
+<a name="N100EF"></a><a name="%E6%9C%80%E5%B0%8F%E9%85%8D%E7%BD%AE"></a>
+<h3 class="h4">最小配置</h3>
+<p>为运行HOD,以下的最小配置是必须要做的:</p>
+<ul>
+ 
+<li>在你想要运行hod的节点上,编辑&lt;install dir&gt;/conf目录下的hodrc文件。这个文件包含了运行hod所必需的最少量的设置。</li>
+ 
+<li>
+
+<p>为这个配置文件中的定义的变量指定适合你环境的值。注意,有些变量在文件中出现了不止一次。</p>
+
+  
+<ul>
+   
+<li>${JAVA_HOME}:Hadoop的Java的安装位置。Hadoop支持Sun JDK 1.5.x及以上版本。</li>
+   
+<li>${CLUSTER_NAME}:集群名称,由'node property'指定,在资源管理器配置中曾提到过。</li>
+   
+<li>${HADOOP_HOME}:Hadoop在计算节点和提交节点上的安装位置。</li>
+   
+<li>${RM_QUEUE}:在资源管理器配置中设置的作业提交队列。</li>
+   
+<li>${RM_HOME}:资源管理器在计算节点和提交节点的安装位置。</li>
+    
+</ul>
+
+</li>
+
+
+<li>
+
+<p>以下环境变量可能需要设置,取决于你的系统环境。在你运行HOD客户端的地方这些变量必须被定义,也必须在HOD配置文件中通过设定resource_manager.env-vars的值指定。多个变量可指定为用逗号分隔的key=value对组成的列表。</p>
+
+<ul>
+   
+<li>HOD_PYTHON_HOME:如果python安装在计算节点或提交节点的非默认位置,那么这个值必须设定为python的可执行文件的实际位置。</li>
+
+</ul>
+
+</li>
+
+</ul>
+<a name="N10123"></a><a name="%E9%AB%98%E7%BA%A7%E9%85%8D%E7%BD%AE"></a>
+<h3 class="h4">高级配置</h3>
+<p>你可以检查和修改其它配置选项来满足你的特定需要。关于HOD配置的更多信息,请参考<a href="hod_config_guide.html">配置指南</a>。</p>
+</div>
+  
+<a name="N10132"></a><a name="%E8%BF%90%E8%A1%8CHOD"></a>
+<h2 class="h3">运行HOD</h2>
+<div class="section">
+<p>当HOD配置好后,你就可以运行它了。更多信息请参考<a href="hod_user_guide.html">HOD用户指南</a>。</p>
+</div>
+
+  
+<a name="N10140"></a><a name="%E6%94%AF%E6%8C%81%E5%B7%A5%E5%85%B7%E5%92%8C%E5%AE%9E%E7%94%A8%E7%A8%8B%E5%BA%8F"></a>
+<h2 class="h3">支持工具和实用程序</h2>
+<div class="section">
+<p>此节描述一些可用于管理HOD部署的支持工具和应用程序。</p>
+<a name="N10149"></a><a name="logcondense.py+-+%E7%AE%A1%E7%90%86%E6%97%A5%E5%BF%97%E6%96%87%E4%BB%B6"></a>
+<h3 class="h4">logcondense.py - 管理日志文件</h3>
+<p>在<a href="hod_user_guide.html#%E6%94%B6%E9%9B%86%E5%92%8C%E6%9F%A5%E7%9C%8BHadoop%E6%97%A5%E5%BF%97">HOD用户指南</a>有提到,HOD可配置成将Hadoop日志上传到一个配置好的静态HDFS上。随着时间增加,日志数量会不断增长。logcondense.py可以帮助管理员清理上传到HDFS的日志文件。</p>
+<a name="N10156"></a><a name="%E8%BF%90%E8%A1%8Clogcondense.py"></a>
+<h4>运行logcondense.py</h4>
+<p>logcondense.py在hod_install_location/support文件夹下。你可以使用python去运行它,比如<em>python logcondense.py</em>,或者授以执行权限,直接运行<em>logcondense.py</em>。如果启用了权限,logcondense.py需要被有足够权限,能删除HDFS上上传目录下日志文件的用户运行。比如,在<a href="hod_config_guide.html#3.7+hodring%E7%9A%84%E9%85%8D%E7%BD%AE%E9%A1%B9">配置指南</a>中提及过,用户可以配置将日志放在HDFS上的其主目录下。在这种情况下,你需要具有超级用户权限,才能运行logcondense.py删除所有用户主目录下的日志文件。</p>
+<a name="N1016A"></a><a name="logcondense.py%E7%9A%84%E5%91%BD%E4%BB%A4%E8%A1%8C%E9%80%89%E9%A1%B9"></a>
+<h4>logcondense.py的命令行选项</h4>
+<p>logcondense.py支持以下命令行选项</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+            
+<tr>
+              
+<td colspan="1" rowspan="1">短选项</td>
+              <td colspan="1" rowspan="1">长选项</td>
+              <td colspan="1" rowspan="1">含义</td>
+              <td colspan="1" rowspan="1">例子</td>
+            
+</tr>
+            
+<tr>
+              
+<td colspan="1" rowspan="1">-p</td>
+              <td colspan="1" rowspan="1">--package</td>
+              <td colspan="1" rowspan="1">hadoop脚本的全路径。Hadoop的版本必须和运行HDFS的版本一致。</td>
+              <td colspan="1" rowspan="1">/usr/bin/hadoop</td>
+            
+</tr>
+            
+<tr>
+              
+<td colspan="1" rowspan="1">-d</td>
+              <td colspan="1" rowspan="1">--days</td>
+              <td colspan="1" rowspan="1">删除超过指定天数的日志文件</td>
+              <td colspan="1" rowspan="1">7</td>
+            
+</tr>
+            
+<tr>
+              
+<td colspan="1" rowspan="1">-c</td>
+              <td colspan="1" rowspan="1">--config</td>
+              <td colspan="1" rowspan="1">Hadoop配置目录的路径,hadoop-site.xml存在于此目录中。hadoop-site.xml中须指明待删除日志存放的HDFS的NameNode。</td>
+              <td colspan="1" rowspan="1">/home/foo/hadoop/conf</td>
+            
+</tr>
+            
+<tr>
+              
+<td colspan="1" rowspan="1">-l</td>
+              <td colspan="1" rowspan="1">--logs</td>
+              <td colspan="1" rowspan="1">一个HDFS路径,须和log-destination-uri指定的是同一个HDFS路径,不带hdfs:// URI串,这点在<a href="hod_config_guide.html#3.7+hodring%E7%9A%84%E9%85%8D%E7%BD%AE%E9%A1%B9">配置指南</a>中提到过。</td>
+              <td colspan="1" rowspan="1">/user</td>
+            
+</tr>
+            
+<tr>
+              
+<td colspan="1" rowspan="1">-n</td>
+              <td colspan="1" rowspan="1">--dynamicdfs</td>
+            <td colspan="1" rowspan="1">如果为true,logcondense.py除要删除Map/Reduce日志之外还需删除HDFS日志。否则,它只删除Map/Reduce日志,这也是不指定这个选项时的默认行为。这个选项对下面的情况非常有用:一个动态的HDFS由HOD供应,一个静态的HDFS用来收集日志文件 - 也许这是测试集群中一个非常普遍的使用场景。</td>
+              <td colspan="1" rowspan="1">false</td>
+            
+</tr>
+          
+</table>
+<p>比如,假如要删除所有7天之前的日志文件,hadoop-site.xml存放在~/hadoop-conf下,hadoop安装于~/hadoop-0.17.0,你可以这样:</p>
+<p>
+<em>python logcondense.py -p ~/hadoop-0.17.0/bin/hadoop -d 7 -c ~/hadoop-conf -l /user</em>
+</p>
+<a name="N1020D"></a><a name="checklimits.sh+-+%E7%9B%91%E8%A7%86%E8%B5%84%E6%BA%90%E9%99%90%E5%88%B6"></a>
+<h3 class="h4">checklimits.sh - 监视资源限制</h3>
+<p>checklimits.sh是一个针对Torque/Maui环境的HOD工具(<a href="http://www.clusterresources.com/pages/products/maui-cluster-scheduler.php">Maui集群调度器</a> 是一个用于集群和超级计算机的开源作业调度器,来自clusterresourcces)。当新提交的作业违反或超过用户在Maui调度器里设置的限制时,checklimits.sh脚本更新torque的comment字段。它使用qstat在torque的job-list中做一次遍历确定作业是在队列中还是已完成,运行Maui工具checkjob检查每一个作业是否违反用户限制设定,之后运行torque的qalter工具更新作业的'comment'的属性。当前,它把那些违反限制的作业的comment的值更新为<em>User-limits exceeded. Requested:([0-9]*) Used:([0-9]*) MaxLimit:([0-9]*)</em>。之后,HOD根据这个注释内容做出相应处理。
+      </p>
+<a name="N1021D"></a><a name="%E8%BF%90%E8%A1%8Cchecklimits.sh"></a>
+<h4>运行checklimits.sh</h4>
+<p>checklimits.sh可以在hod_install_location/support目录下下找到。在具有得执行权限后,这个shell脚本可以直接通过<em>sh checklimits.sh </em>或者<em>./checklimits.sh</em>运行。这个工具运行的机器上应有Torque和Maui的二进制运行文件,并且这些文件要在这个shell脚本进程的路径中。为了更新不同用户作业的comment值,这个工具必须以torque的管理员权限运行。这个工具必须按照一定时间间隔重复运行,来保证更新job的约束条件,比如可以通过cron。请注意,这个脚本中用到的资源管理器和调度器命令运行代价可能会比价大,所以最好不要在没有sleeping的紧凑循环中运行。
+        </p>
+<a name="N1022E"></a><a name="verify-account+-+%E7%94%A8%E4%BA%8E%E6%A0%B8%E5%AE%9E%E7%94%A8%E6%88%B7%E6%8F%90%E4%BA%A4%E4%BD%9C%E4%B8%9A%E6%89%80%E4%BD%BF%E7%94%A8%E7%9A%84%E5%B8%90%E5%8F%B7%E7%9A%84%E8%84%9A%E6%9C%AC"></a>
+<h3 class="h4">verify-account - 用于核实用户提交作业所使用的帐号的脚本</h3>
+<p>生产系统一般使用帐号系统来对使用共享资源的用户收费。HOD支持一个叫<em>resource_manager.pbs-account</em>的参数,用户可以通过这个参数来指定提交作业时使用的帐号。核实这个帐户在帐号管理系统中的有效性是有必要的。脚本<em>hod-install-dir/bin/verify-account</em>提供了一种机制让用户插入自定义脚本来实现这个核实过程。</p>
+<a name="N1023D"></a><a name="%E5%9C%A8HOD%E4%B8%AD%E9%9B%86%E6%88%90verify-account"></a>
+<h4>在HOD中集成verify-account</h4>
+<p>在分配集群之前,HOD运行<em>verify-account</em>脚本,将<em>resource_manager.pbs-account</em>的值作为参数传递给用户自定义脚本来完成用户的确认。系统还可以通过这种方式来取代它本身的帐号系统。若该用户脚本中的返回值非0,就会导致HOD分配集群失败。并且在发生错误时,HOD还会将脚本中产生的错误信息打印出来。通过这种方式,任何描述性的错误信息都可以从用户脚本中返回给用户。
+      </p>
+<p>在HOD中自带的默认脚本是不做任何的用户核实,并返回0。</p>
+<p>如果HOD没有找到上面提到的verify-account脚本,HOD就会认为该用户核实的功能被关闭,然后继续自己以后的分配工作。</p>
+</div>
+
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 162 - 0
docs/cn/hod_admin_guide.pdf


+ 422 - 0
docs/cn/hod_config_guide.html

@@ -0,0 +1,422 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title> 
+      Hadoop On Demand:配置指南
+    </title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://hadoop.apache.org/core/">项目</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop">维基</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.18文档</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">文档</div>
+<div id="menu_1.1" class="menuitemgroup">
+<div class="menuitem">
+<a href="index.html">概述</a>
+</div>
+<div class="menuitem">
+<a href="quickstart.html">快速入门</a>
+</div>
+<div class="menuitem">
+<a href="cluster_setup.html">集群搭建</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_design.html">HDFS构架设计</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_user_guide.html">HDFS使用指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_permissions_guide.html">HDFS权限指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_quota_admin_guide.html">HDFS配额管理指南</a>
+</div>
+<div class="menuitem">
+<a href="commands_manual.html">命令手册</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_shell.html">FS Shell使用指南</a>
+</div>
+<div class="menuitem">
+<a href="distcp.html">DistCp使用指南</a>
+</div>
+<div class="menuitem">
+<a href="mapred_tutorial.html">Map-Reduce教程</a>
+</div>
+<div class="menuitem">
+<a href="native_libraries.html">Hadoop本地库</a>
+</div>
+<div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
+<a href="hadoop_archives.html">Hadoop Archives</a>
+</div>
+<div class="menuitem">
+<a href="hod.html">Hadoop On Demand</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/">维基</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">发行说明</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">变更日志</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="hod_config_guide.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1> 
+      Hadoop On Demand:配置指南
+    </h1>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#1.+%E7%AE%80%E4%BB%8B">1. 简介</a>
+</li>
+<li>
+<a href="#2.+%E6%AE%B5">2. 段</a>
+</li>
+<li>
+<a href="#3.+HOD%E9%85%8D%E7%BD%AE%E9%A1%B9">3. HOD配置项</a>
+<ul class="minitoc">
+<li>
+<a href="#3.1+%E4%B8%80%E8%88%AC%E7%9A%84%E9%85%8D%E7%BD%AE%E9%A1%B9">3.1 一般的配置项</a>
+</li>
+<li>
+<a href="#3.2+hod%E7%9A%84%E9%85%8D%E7%BD%AE%E9%A1%B9">3.2 hod的配置项</a>
+</li>
+<li>
+<a href="#3.3+resouce_manager%E7%9A%84%E9%85%8D%E7%BD%AE%E9%A1%B9">3.3 resouce_manager的配置项</a>
+</li>
+<li>
+<a href="#3.4+ringmaster%E7%9A%84%E9%85%8D%E7%BD%AE%E9%A1%B9">3.4 ringmaster的配置项</a>
+</li>
+<li>
+<a href="#3.5+gridservice-hdfs%E7%9A%84%E9%85%8D%E7%BD%AE%E9%A1%B9">3.5 gridservice-hdfs的配置项</a>
+</li>
+<li>
+<a href="#3.6+gridservice-mapred%E7%9A%84%E9%85%8D%E7%BD%AE%E9%A1%B9">3.6 gridservice-mapred的配置项</a>
+</li>
+<li>
+<a href="#3.7+hodring%E7%9A%84%E9%85%8D%E7%BD%AE%E9%A1%B9">3.7 hodring的配置项</a>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+    
+<a name="N1000C"></a><a name="1.+%E7%AE%80%E4%BB%8B"></a>
+<h2 class="h3">1. 简介</h2>
+<div class="section">
+<p>
+      这个文档讲述了一些最重要和常用的Hadoop On Demand(HOD)的配置项。
+      这些配置项可通过两种方式指定:INI风格的配置文件,通过--section.option[=value]格式指定的HOD shell的命令行选项。如果两个地方都指定了同一个选项,命令行中的值覆盖配置文件中的值。
+	</p>
+<p>
+	你可以通过以下命令获得所有配置项的简要描述:
+      </p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">$ hod --verbose-help</span></td>
+</tr>
+</table>
+</div>
+    
+<a name="N10021"></a><a name="2.+%E6%AE%B5"></a>
+<h2 class="h3">2. 段</h2>
+<div class="section">
+<p>HOD配置文件分成以下几个配置段:</p>
+<ul>
+        
+<li>  hod:                 HOD客户端的配置项</li>
+        
+<li>  resource_manager:    指定要使用的资源管理器的配置项,以及使用该资源管理器时需要的一些其他参数。</li>
+        
+<li>  ringmaster:          RingMaster进程的配置项</li>
+        
+<li>  hodring:             HodRing进程的配置项</li>
+        
+<li>  gridservice-mapred:  Map/Reduce守护进程的配置项</li>
+        
+<li>  gridservice-hdfs:    HDFS守护进程的配置项</li>
+      
+</ul>
+</div>
+     
+<a name="N10040"></a><a name="3.+HOD%E9%85%8D%E7%BD%AE%E9%A1%B9"></a>
+<h2 class="h3">3. HOD配置项</h2>
+<div class="section">
+<p>
+      接下来的一节会先描述大部分HOD配置段中通用的一些配置项,再描述各配置段特有的配置项。
+      </p>
+<a name="N10049"></a><a name="3.1+%E4%B8%80%E8%88%AC%E7%9A%84%E9%85%8D%E7%BD%AE%E9%A1%B9"></a>
+<h3 class="h4">3.1 一般的配置项</h3>
+<p>某些配置项会在HOD配置中的多个段定义。在一个段中定义的配置项,会被该段所适用的所有进程使用。这些配置项意义相同,但在不同的段中可以有不同的取值。</p>
+<ul>
+          
+<li>temp-dir: HOD进程使用的临时目录。请确保运行hod的用户有权限在这个指定的目录下创建子目录。如果想在每次分配的时候都使用不同的临时目录,可以使用环境变量,资源管理器会让这些环境变量对HOD进程可用。例如,在Torque设置的时候,使--ringmaster.temp-dir=/tmp/hod- temp-dir.$PBS_JOBID会让ringmaster在每一次申请时使用不同的临时目录;Troque会在ringmaster启动前展开这个环境变量。
+          </li>
+          
+<li>debug:数值类型,取值范围是1-4。4会产生最多的log信息。</li>
+          
+<li>log-dir:日志文件的存放目录。缺省值是&lt;install-location&gt;/logs/。temp-dir变量的限制和注意事项在这里同样使用。
+          </li>
+          
+<li>xrs-port-range:端口范围,会在这之中挑选一个可用端口用于运行XML-RPC服务。</li>
+          
+<li>http-port-range:端口范围,会在这之中挑选一个可用端口用于运行HTTP服务。</li>
+          
+<li>java-home:给Hadoop使用的Java的位置。</li>
+          
+<li>syslog-address:syslog守护进程要绑定的地址。格式为host:port。如果配置了这个选项,HOD日志信息会被记录到这个位置的syslog。</li>
+        
+</ul>
+<a name="N1006B"></a><a name="3.2+hod%E7%9A%84%E9%85%8D%E7%BD%AE%E9%A1%B9"></a>
+<h3 class="h4">3.2 hod的配置项</h3>
+<ul>
+          
+<li>cluster:集群的描述性名称。对于Torque,这个值被指定为集群中所有节点的'Node property'。HOD使用这个值计算可用节点的个数。</li>
+          
+<li>client-params:逗号分割的hadoop配置参数列表,其中的每一项都是一个key-value对。在提交节点上会据此产生一个hadoop-site.xml,用于运行Map/Reduce作业。</li>
+          
+          
+<li>job-feasibility-attr: 正则表达式,用于指定是否和如何检查作业的可行性 - 资源管理器限制或调度限制。目前是通过torque作业的'comment'属性实现的,缺省情况下没有开启这个功能。设置了这个配置项后,HOD会使用它来确定哪些种类的限制是启用的,以及请求超出限制或者累积超出限制时是回收机群还是留在排队状态。torque comment属性可以被某个外部机制周期性地更新。比如,comment属性被hod/support目录下的<a href="hod_admin_guide.html#checklimits.sh+-+%E8%B5%84%E6%BA%90%E9%99%90%E5%88%B6%E7%9B%91%E8%A7%86%E5%99%A8">checklimits.sh</a>更新,这样设置job-feasibility-attr的值等于TORQUE_USER_LIMITS_COMMENT_FIELD, "User-limits exceeded. Requested:([0-9]*) Used:([0-9]*) MaxLimit:([0-9]*)"会使HOD产生相应的行为。</li>
+         
+</ul>
+<a name="N10082"></a><a name="3.3+resouce_manager%E7%9A%84%E9%85%8D%E7%BD%AE%E9%A1%B9"></a>
+<h3 class="h4">3.3 resouce_manager的配置项</h3>
+<ul>
+          
+<li>queue:资源管理器中配置的队列名,作业会被提交到这里。</li>
+          
+          
+<li>batch-home:个安装目录,其下的'bin'中有资源管理器的可执行文件。</li> 
+          
+<li>env-vars:逗号分隔的key-value对的列表,形式是key=value,它会被传递给运行在计算节点的作业。例如,如果ptyhon没有安装在常规位置,用户可以通过设置环境变量'HOD_PYTHON_HOME'指定python可执行文件的路径。之后,在计算节点运行的HOD的进程就可以使用这个变量了。</li>
+        
+</ul>
+<a name="N10095"></a><a name="3.4+ringmaster%E7%9A%84%E9%85%8D%E7%BD%AE%E9%A1%B9"></a>
+<h3 class="h4">3.4 ringmaster的配置项</h3>
+<ul>
+          
+<li>work-dirs:这是一个由逗号分隔的路径列表,这些路径将作为HOD产生和传递给Hadoop,用于存放DFS和Map/Reduce数据的目录的根目录。例如,这是DFS数据块存放的路径。一般情况下,有多少块磁盘就指定多少路径,以确保所有的磁盘都被利用到。temp-dir变量的限制和注意事项在这儿同样适用。</li>
+          
+<li>max-master-failures:hadoop主守护进启动前可以失败的次数,超出这个次数后,HOD会让这次集群分配失败。在HOD集群中,有时候由于某些问题,比如机器没安装java,没有安装Hadoop,或者Hadoop版本错误等,会存在一个或几个&ldquo;坏&rdquo;节点。当这个配置项被设为正整数时,只有当hadoop matser(JobTracker或者NameNode)在上述的坏节点上,由于上面提到的种种原因启动失败的次数超过设定的值时,RingMaster才会把错误返回给客户端。如果尝试启动的次数没有超过设定值,当下一个HodRing请求运行一个命令时,同一个hadoop master会指定给这个HodRing。这样,即使集群中存在一些坏的节点,HOD也会尽全力使这次分配成功。
+                       </li>
+ 
+        
+</ul>
+<a name="N100A5"></a><a name="3.5+gridservice-hdfs%E7%9A%84%E9%85%8D%E7%BD%AE%E9%A1%B9"></a>
+<h3 class="h4">3.5 gridservice-hdfs的配置项</h3>
+<ul>
+          
+<li>external:如果被置为false,HOD必须在通过allocate命令分配的节点上自己创建HDFS集群。注意,在这种情况下,如果集群被回收,HDFS集群会停止,所有数据会丢失。如果被置为true,它会尝试链接外部的已配置的HDFS系统。通常,因为在作业运行之前作业的输入需要被放置在HDFS上,并且作业的输出需要持久保留,在生产环境中一个内部的HDFS集群意义不大。</li>
+          
+          
+<li>host:外部配置好的NameNode的主机名。</li>
+          
+          
+<li>fs_port:NameNode RPC服务绑定的端口。</li>
+          
+          
+<li>info_port:NameNode web UI服务绑定的端口。</li>
+          
+          
+<li>pkgs:安装目录,其下有bin/hadoop可执行文件。可用来使用集群上预先安装的Hadoop版本。</li>
+          
+          
+<li>server-params:一个逗号分割的hadoop配置参数列表,每一项为key-value对形式。这些将用于产生被NameNode和DataNode使用到的hadoop-site.xml文件。</li>
+          
+          
+<li>final-server-params:除会被标记为final外和上面相同。</li>
+        
+</ul>
+<a name="N100C4"></a><a name="3.6+gridservice-mapred%E7%9A%84%E9%85%8D%E7%BD%AE%E9%A1%B9"></a>
+<h3 class="h4">3.6 gridservice-mapred的配置项</h3>
+<ul>
+          
+<li>external:如果被置为false,HOD必须在通过allocate命令分配的节点上自己创建Map/Reduce集群。如果被置为true,它会尝试链接外部的已配置的Map/Reduce系统。</li>
+          
+<li>host:外部配置好的JobTracker的主机名。</li>
+          
+          
+<li>tracker_port:JobTracker RPC服务绑定的端口。</li>
+          
+          
+<li>info_port:JobTracker web UI服务绑定的端口。</li>
+          
+          
+<li>pkgs:安装目录,其下有bin/hadoop可执行文件。</li>
+          
+          
+<li>server-params:一个逗号分割的hadoop配置参数列表,每一项为key-value对形式。这些将用于产生被JobTracker和TaskTracker使用到的hadoop-site.xml文件。</li>
+          
+<li>final-server-params:除会被标记为final外和上面相同。</li>
+        
+</ul>
+<a name="N100E3"></a><a name="3.7+hodring%E7%9A%84%E9%85%8D%E7%BD%AE%E9%A1%B9"></a>
+<h3 class="h4">3.7 hodring的配置项</h3>
+<ul>
+          
+<li>mapred-system-dir-root:DFS上的目录,HOD会在这个目录下创建子目录并把全路径作为参数'mapred.system.dir'的值传递给Hadoop守护进程。全路径的格式为value-of-this-option/userid/mapredsystem/cluster-id。注意,如果HDFS启用了权限,这里指定的路径下应允许所有用户创建子目录。设置此配置项的值为/user会使HOD使用用户的home目录来产生mapred.system.dir的值。</li>
+          
+<li>log-destination-uri:一个URL,能反映一个外部的静态的DFS或者集群节点上的本地文件系统上的路径。当集群被回收时,HOD会把Hadoop日志上传到这个路径。要指定DFS路径,使用'hdfs://path'格式。要指定一个集群节点上的本地文件系统路径,使用'file://path'格式。当HOD回收集群时,作为HOD的清除过程的一部分,hadoop日志会被删除。要做到持久储这些日志,你可以使用这个配置项。路径的格式会是values-of-this-option/userid/hod-logs/cluster-id。注意,应该保证所有的用户能在这里指定的目录下创建子目录。把这个值设为hdfs://user会使这些日志被转移到用户在DFS上的home目录下。</li>
+          
+<li>pkgs:安装目录,其下有bin/hadoop可执行文件。如果给log-destination-uri指定了一个HDFS URL,HOD上传日志时会用到这个配置项。注意,当用户使用了和外部静态HDFS不同版本的tarball时,这个配置项会派上用场。</li>
+                                      
+        
+</ul>
+</div>
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 140 - 0
docs/cn/hod_config_guide.pdf


+ 1251 - 0
docs/cn/hod_user_guide.html

@@ -0,0 +1,1251 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title>
+      Hadoop On Demand用户指南
+    </title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://hadoop.apache.org/core/">项目</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop">维基</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.18文档</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">文档</div>
+<div id="menu_1.1" class="menuitemgroup">
+<div class="menuitem">
+<a href="index.html">概述</a>
+</div>
+<div class="menuitem">
+<a href="quickstart.html">快速入门</a>
+</div>
+<div class="menuitem">
+<a href="cluster_setup.html">集群搭建</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_design.html">HDFS构架设计</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_user_guide.html">HDFS使用指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_permissions_guide.html">HDFS权限指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_quota_admin_guide.html">HDFS配额管理指南</a>
+</div>
+<div class="menuitem">
+<a href="commands_manual.html">命令手册</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_shell.html">FS Shell使用指南</a>
+</div>
+<div class="menuitem">
+<a href="distcp.html">DistCp使用指南</a>
+</div>
+<div class="menuitem">
+<a href="mapred_tutorial.html">Map-Reduce教程</a>
+</div>
+<div class="menuitem">
+<a href="native_libraries.html">Hadoop本地库</a>
+</div>
+<div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
+<a href="hadoop_archives.html">Hadoop Archives</a>
+</div>
+<div class="menuitem">
+<a href="hod.html">Hadoop On Demand</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/">维基</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">发行说明</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">变更日志</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="hod_user_guide.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>
+      Hadoop On Demand用户指南
+    </h1>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#%E7%AE%80%E4%BB%8B">简介</a>
+</li>
+<li>
+<a href="#HOD%E4%BD%BF%E7%94%A8%E5%85%A5%E9%97%A8">HOD使用入门</a>
+<ul class="minitoc">
+<li>
+<a href="#%E4%B8%80%E4%B8%AA%E5%85%B8%E5%9E%8BHOD%E4%BC%9A%E8%AF%9D">一个典型HOD会话</a>
+</li>
+<li>
+<a href="#%E4%BD%BF%E7%94%A8HOD%E8%BF%90%E8%A1%8CHadoop%E8%84%9A%E6%9C%AC">使用HOD运行Hadoop脚本</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#HOD%E7%9A%84%E5%8A%9F%E8%83%BD">HOD的功能</a>
+<ul class="minitoc">
+<li>
+<a href="#%E4%BE%9B%E5%BA%94%E4%B8%8E%E7%AE%A1%E7%90%86Hadoop%E9%9B%86%E7%BE%A4">供应与管理Hadoop集群</a>
+</li>
+<li>
+<a href="#%E4%BD%BF%E7%94%A8tarball%E5%88%86%E5%8F%91Hadoop">使用tarball分发Hadoop</a>
+</li>
+<li>
+<a href="#%E4%BD%BF%E7%94%A8%E5%A4%96%E9%83%A8HDFS">使用外部HDFS</a>
+</li>
+<li>
+<a href="#%E9%85%8D%E7%BD%AEHadoop%E7%9A%84%E9%80%89%E9%A1%B9">配置Hadoop的选项</a>
+</li>
+<li>
+<a href="#%E6%9F%A5%E7%9C%8BHadoop%E7%9A%84Web-UI">查看Hadoop的Web-UI</a>
+</li>
+<li>
+<a href="#%E6%94%B6%E9%9B%86%E5%92%8C%E6%9F%A5%E7%9C%8BHadoop%E6%97%A5%E5%BF%97">收集和查看Hadoop日志</a>
+</li>
+<li>
+<a href="#%E9%97%B2%E7%BD%AE%E9%9B%86%E7%BE%A4%E7%9A%84%E8%87%AA%E5%8A%A8%E5%9B%9E%E6%94%B6">闲置集群的自动回收</a>
+</li>
+<li>
+<a href="#%E6%8C%87%E5%AE%9A%E9%A2%9D%E5%A4%96%E7%9A%84%E4%BD%9C%E4%B8%9A%E5%B1%9E%E6%80%A7">指定额外的作业属性</a>
+</li>
+<li>
+<a href="#%E6%8D%95%E8%8E%B7HOD%E5%9C%A8Torque%E4%B8%AD%E7%9A%84%E9%80%80%E5%87%BA%E7%A0%81">捕获HOD在Torque中的退出码</a>
+</li>
+<li>
+<a href="#%E5%91%BD%E4%BB%A4%E8%A1%8C">命令行</a>
+</li>
+<li>
+<a href="#HOD%E9%85%8D%E7%BD%AE%E9%80%89%E9%A1%B9"> HOD配置选项</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#%E6%95%85%E9%9A%9C%E6%8E%92%E9%99%A4">故障排除</a>
+<ul class="minitoc">
+<li>
+<a href="#%E5%88%86%E9%85%8D%E6%93%8D%E4%BD%9C%E6%97%B6">分配操作时hod挂起</a>
+</li>
+<li>
+<a href="#%E5%9B%9E%E6%94%B6%E6%93%8D%E4%BD%9C%E6%97%B6">回收操作时hod挂起</a>
+</li>
+<li>
+<a href="#%E5%A4%B1%E8%B4%A5%E6%97%B6%E7%9A%84%E9%94%99%E8%AF%AF%E4%BB%A3%E7%A0%81%E5%92%8C%E9%94%99%E8%AF%AF%E4%BF%A1%E6%81%AF">hod失败时的错误代码和错误信息</a>
+</li>
+<li>
+<a href="#Hadoop+DFSClient%E8%AD%A6%E5%91%8ANotReplicatedYetException%E4%BF%A1%E6%81%AF">Hadoop DFSClient警告NotReplicatedYetException信息</a>
+</li>
+<li>
+<a href="#%E6%88%90%E5%8A%9F%E5%88%86%E9%85%8D%E7%9A%84%E9%9B%86%E7%BE%A4%E4%B8%8A%E6%97%A0%E6%B3%95%E8%BF%90%E8%A1%8CHadoop%E4%BD%9C%E4%B8%9A">成功分配的集群上无法运行Hadoop作业</a>
+</li>
+<li>
+<a href="#%E6%88%91%E7%9A%84Hadoop%E4%BD%9C%E4%B8%9A%E8%A2%AB%E4%B8%AD%E6%AD%A2%E4%BA%86">我的Hadoop作业被中止了</a>
+</li>
+<li>
+<a href="#Hadoop%E4%BD%9C%E4%B8%9A%E5%A4%B1%E8%B4%A5%E5%B9%B6%E8%BF%94%E5%9B%9E%E6%B6%88%E6%81%AF%EF%BC%9A%E2%80%98Job+tracker+still+initializing%E2%80%99">Hadoop作业失败并返回消息:&lsquo;Job tracker still initializing&rsquo;</a>
+</li>
+<li>
+<a href="#Torque%E7%9A%84%E9%80%80%E5%87%BA%E4%BB%A3%E7%A0%81%E6%B2%A1%E6%9C%89%E5%8C%85%E5%90%ABHOD%E7%9A%84">Torque的退出代码没有包含HOD的</a>
+</li>
+<li>
+<a href="#Hadoop%E6%97%A5%E5%BF%97%E6%9C%AA%E8%A2%AB%E4%B8%8A%E4%BC%A0%E5%88%B0DFS">Hadoop日志未被上传到DFS</a>
+</li>
+<li>
+<a href="#%E5%AE%9A%E4%BD%8DRingmaster%E6%97%A5%E5%BF%97">定位Ringmaster日志</a>
+</li>
+<li>
+<a href="#%E5%AE%9A%E4%BD%8DHodring%E6%97%A5%E5%BF%97">定位Hodring日志</a>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+  
+<a name="N1000C"></a><a name="%E7%AE%80%E4%BB%8B"></a>
+<h2 class="h3">简介</h2>
+<div class="section">
+<a name="Introduction" id="Introduction"></a>
+<p>Hadoop On Demand (HOD)是一个能在大规模物理集群上供应虚拟Hadoop集群的系统。它使用Torque资源管理器进行节点分配。在所分配的节点上,它能启动Hadoop Map/Reduce以及HDFS守护进程。它能自动为Hadoop守护进程及客户端生成合适的配置文件(Hadoop-site.xml)。HOD还能够将Hadoop分发到它分配的虚拟集群节点上。总之,HOD方便管理者和用户快速安装与使用Hadoop。它也是需要在同一物理集群上测试各自版本的Hadoop开发者和测试者的实用工具。</p>
+<p>HOD支持Hadoop 0.15及其后续版本。</p>
+<p>后面的文档包括一个快速入门指南能让你快速上手HOD,一个所有HOD特性的详细手册,命令行选项,一些已知问题和故障排除的信息。</p>
+</div>
+  
+<a name="N1001E"></a><a name="HOD%E4%BD%BF%E7%94%A8%E5%85%A5%E9%97%A8"></a>
+<h2 class="h3">HOD使用入门</h2>
+<div class="section">
+<a name="Getting_Started_Using_HOD_0_4" id="Getting_Started_Using_HOD_0_4"></a>
+<p>在这部分,我们将会逐步骤地介绍使用HOD涉及到的最基本的操作。在开始遵循这些步骤之前,我们假定HOD及其依赖的软硬件均已被正确安装和配置。这步通常由集群的系统管理员负责。</p>
+<p>HOD的用户界面是一个命令行工具,叫做<span class="codefrag">hod</span>。它被一个通常由系统管理员为用户设置好的配置文件所驱动。用户在使用<span class="codefrag">hod</span>的时候可以覆盖这个配置,文档的后面会由介绍。使用<span class="codefrag">hod</span>时有如下两种方式可以指定配置文件:</p>
+<ul>
+    
+<li>在命令行中指定,使用 -c 选项。例如<span class="codefrag">hod &lt;operation&gt; &lt;required-args&gt; -c path-to-the-configuration-file [ohter-options]</span>
+</li>
+    
+<li>在运行<span class="codefrag">hod</span>的地方设置环境变量<em>HOD_CONF_DIR</em>。这个变量应指向指向一个本地目录,其中有名为<em>hodrc</em>的文件。这与Hadoop中的<em>HADOOP_CONF_DIR</em>与<em>hadoop-site.xml</em>文件是类似的。如果命令行中未指定配置文件,<span class="codefrag">hod</span>会查找<em>HOD_CONF_DIR</em>环境变量指定目录下的<em>hodrc</em>文件。</li>
+    
+</ul>
+<p>下面的例子中,我们将不会明确指出这个配置选项,假定其已正确指定。</p>
+<a name="N1005B"></a><a name="%E4%B8%80%E4%B8%AA%E5%85%B8%E5%9E%8BHOD%E4%BC%9A%E8%AF%9D"></a>
+<h3 class="h4">一个典型HOD会话</h3>
+<a name="HOD_Session" id="HOD_Session"></a>
+<p>一个典型HOD会话至少包括三个步骤:分配,执行Hadoop作业,回收。为此,执行如下步骤。</p>
+<p>
+<strong>创建一个集群目录</strong>
+</p>
+<a name="Create_a_Cluster_Directory" id="Create_a_Cluster_Directory"></a>
+<p>
+<em>集群目录</em>是本地文件系统上的一个目录,<span class="codefrag">hod</span>会为它分配的集群产生对应的Hadoop配置<em>hadoop-site.xml</em>,放在这个目录下。这个目录可以按下文所述方式传递给<span class="codefrag">hod</span>操作。如果这个目录不存在,HOD会自动创建这个目录。一但分配好了集群,用户可通过Hadoop --config选项指定集群目录,在之上运行Hadoop作业。</p>
+<p>
+<strong><em>allocate</em>操作</strong>
+</p>
+<a name="Operation_allocate" id="Operation_allocate"></a>
+<p>
+<em>allocate</em>操作用来分配一组节点并在之上安装和提供Hadoop。它的语法如下。注意它要求指定参数集群目录(-d, --hod.clusterdir)和节点个数(-n, --hod.nodecount):</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+      
+        
+<tr>
+          
+<td colspan="1" rowspan="1"><span class="codefrag">$ hod allocate -d cluster_dir -n number_of_nodes [OPTIONS]</span></td>
+        
+</tr>
+      
+    
+</table>
+<p>如果命令成功执行,<span class="codefrag">cluster_dir/hadoop-site.xml</span>会被生成,文件中包含了分配出的集群的信息。它也会打印出关于Hadoop的web UI的信息。</p>
+<p>试运行这个命令会产生如下输出。注意在这个例子中集群目录是<span class="codefrag">~/hod-clusters/test</span>,我们要分配5个节点:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+    
+<tr>
+      
+<td colspan="1" rowspan="1"><span class="codefrag">$ hod allocate -d ~/hod-clusters/test -n 5</span>
+<br>
+      
+<span class="codefrag">INFO - HDFS UI on http://foo1.bar.com:53422</span>
+<br>
+      
+<span class="codefrag">INFO - Mapred UI on http://foo2.bar.com:55380</span>
+<br>
+</td>
+      
+</tr>
+   
+</table>
+<p>
+<strong>在分配的集群上执行Hadoop作业</strong>
+</p>
+<a name="Running_Hadoop_jobs_using_the_al" id="Running_Hadoop_jobs_using_the_al"></a>
+<p>现在,可以用一般的方式在分配的集群上执行Hadoop作业了。这是假定像<em>JAVA_HOME</em>,指向Hadoop安装的路径已被正确地设置了:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+      
+        
+<tr>
+          
+<td colspan="1" rowspan="1"><span class="codefrag">$ hadoop --config cluster_dir hadoop_command hadoop_command_args</span></td>
+        
+</tr>
+      
+    
+</table>
+<p>或者</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+      
+        
+<tr>
+          
+<td colspan="1" rowspan="1"><span class="codefrag">$ export HADOOP_CONF_DIR=cluster_dir</span> 
+<br>
+              
+<span class="codefrag">$ hadoop hadoop_command hadoop_command_args</span></td>
+        
+</tr>
+      
+    
+</table>
+<p>继续我们的例子,下面的命令会在分配的集群上运行wordcount的例子:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">$ hadoop --config ~/hod-clusters/test jar /path/to/hadoop/hadoop-examples.jar wordcount /path/to/input /path/to/output</span></td>
+</tr>
+</table>
+<p>或者</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+<tr>
+    
+<td colspan="1" rowspan="1"><span class="codefrag">$ export HADOOP_CONF_DIR=~/hod-clusters/test</span>
+<br>
+    
+<span class="codefrag">$ hadoop jar /path/to/hadoop/hadoop-examples.jar wordcount /path/to/input /path/to/output</span></td>
+    
+</tr>
+  
+</table>
+<p>
+<strong> <em>deallocate</em>操作</strong>
+</p>
+<a name="Operation_deallocate" id="Operation_deallocate"></a>
+<p>
+<em>deallocate</em>操作用来回收分配到的集群。当完成集群使用之后,必须执行回收操作使这些节点可以为其他用户所用。<em>deallocate</em>操作的语法如下。注意它需要集群目录(-d, --hod.clusterdir)作为参数:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+      
+        
+<tr>
+          
+<td colspan="1" rowspan="1"><span class="codefrag">$ hod deallocate -d cluster_dir</span></td>
+        
+</tr>
+      
+    
+</table>
+<p>继续我们的例子,如下命令会回收集群:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">$ hod deallocate -d ~/hod-clusters/test</span></td>
+</tr>
+</table>
+<p>如你所见,HOD允许用户分配一个集群,随意的使用它来运行Hadoop作业。例如,通过从多个shell中启动使用同一个配置的hadoop,用户可以做到在同一个集群上并发运行多个作业。</p>
+<a name="N10128"></a><a name="%E4%BD%BF%E7%94%A8HOD%E8%BF%90%E8%A1%8CHadoop%E8%84%9A%E6%9C%AC"></a>
+<h3 class="h4">使用HOD运行Hadoop脚本</h3>
+<a name="HOD_Script_Mode" id="HOD_Script_Mode"></a>
+<p>HOD的<em>script操作</em>能将集群的分配,使用和回收组织在一起。这对那些想运行Hadoop作业脚本,期望HOD能在脚本结束后自动完成清理操作的用户特别管用。用<span class="codefrag">hod</span>执行Hadoop脚本,需要这么做:</p>
+<p>
+<strong>创建脚本文件</strong>
+</p>
+<a name="Create_a_script_file" id="Create_a_script_file"></a>
+<p>这是一个普通的shell脚本,通常里面会包含hadoop命令,如:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">$ hadoop jar jar_file options</span></td>
+  
+</tr>
+</table>
+<p>当然,用户可以向脚本中添加任何有效的命令。HOD会在执行这个脚本时自动地设置<em>HADOOP_CONF_DIR</em>指向分配的集群。用户不必对此担心。不过,像分配操作时一样,用户需要指定一个集群目录。</p>
+<p>
+<strong>运行脚本</strong>
+</p>
+<a name="Running_the_script" id="Running_the_script"></a>
+<p>
+<em>脚本操作</em>的语法如下。注意它需要集群目录(-d, --hod.clusterdir),节点个数(-n, --hod.nodecount)以及脚本文件(-s, --hod.script)作为参数:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+      
+        
+<tr>
+          
+<td colspan="1" rowspan="1"><span class="codefrag">$ hod script -d cluster_directory -n number_of_nodes -s script_file</span></td>
+        
+</tr>
+      
+    
+</table>
+<p>注意一但脚本执行完毕,HOD就会回收集群,这意味着脚本必须要做到等hadoop作业完成后脚本才结束。用户写脚本时必须注意这点。</p>
+</div>
+  
+<a name="N1016C"></a><a name="HOD%E7%9A%84%E5%8A%9F%E8%83%BD"></a>
+<h2 class="h3">HOD的功能</h2>
+<div class="section">
+<a name="HOD_0_4_Features" id="HOD_0_4_Features"></a><a name="N10174"></a><a name="%E4%BE%9B%E5%BA%94%E4%B8%8E%E7%AE%A1%E7%90%86Hadoop%E9%9B%86%E7%BE%A4"></a>
+<h3 class="h4">供应与管理Hadoop集群</h3>
+<a name="Provisioning_and_Managing_Hadoop" id="Provisioning_and_Managing_Hadoop"></a>
+<p>HOD主要功能是供应Hadoop的Map/Reduce和HDFS集群。这些在见入门一节已经做过描述。 此外,要是还有节点可用,并且组织上也批准,一个用户可以在同一时间内使用HOD分配多个Map/Reduce集群。对于分配到的不同集群,用户需要为上面提到的<span class="codefrag">cluster_dir</span>参数指定不同的路径。HOD提供<em>list</em>和<em>info</em>操作可以管理多个集群。</p>
+<p>
+<strong><em>list</em>操作</strong>
+</p>
+<a name="Operation_list" id="Operation_list"></a>
+<p>list操作能列举到目前为止用户所创建的所有集群。存放hadoop-site.xml的集群目录,与JobTracker和/或HDFS的连接及状态也会被显示出来。list操作的使用语法如下:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+      
+        
+<tr>
+          
+<td colspan="1" rowspan="1"><span class="codefrag">$ hod list</span></td>
+        
+</tr>
+      
+    
+</table>
+<p>
+<strong><em>info</em>操作</strong>
+</p>
+<a name="Operation_info" id="Operation_info"></a>
+<p>info操作会显示指定集群相关的信息。这些信息包括Torque作业id,HOD Ringmaster进程,Hadoop的JobTracker和NameNode守护进程等重要守护进程的位置。info操作的语法如下。注意它需要集群目录(-d, --hod.clusterdir)作为参数:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+      
+        
+<tr>
+          
+<td colspan="1" rowspan="1"><span class="codefrag">$ hod info -d cluster_dir</span></td>
+        
+</tr>
+      
+    
+</table>
+<p>
+<span class="codefrag">cluster_dir</span>应为前面<em>allocate</em>操作中指定的有效集群目录。</p>
+<a name="N101BE"></a><a name="%E4%BD%BF%E7%94%A8tarball%E5%88%86%E5%8F%91Hadoop"></a>
+<h3 class="h4">使用tarball分发Hadoop</h3>
+<a name="Using_a_tarball_to_distribute_Ha" id="Using_a_tarball_to_distribute_Ha"></a>
+<p>供应Hadoop时,HOD可以使用集群节点上已经安装好的Hadoop,也可以将hadoop的tarball作为供应操作的一部分在节点上进行分发和安装。如果使用tarball选项,就不必非得使用预装的Hadoop了,也不要求集群节点上必须有一个预装的版本。这对开发/QE环境下在一个共享集群上测试不同版本hadoop的开发者尤其有用。</p>
+<p>要使用预装的Hadoop,你必须在hodrc中的<span class="codefrag">gridservice-hdfs</span>部分和<span class="codefrag">gridservice-mapred</span>部分指定<span class="codefrag">pkgs</span>选项。它必须指向集群中所有节点上Hadoop的安装路径。</p>
+<p>指定Tarball的语法如下:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+        
+<tr>
+          
+<td colspan="1" rowspan="1"><span class="codefrag">$ hod allocate -d cluster_dir -n number_of_nodes -t hadoop_tarball_location</span></td>
+        
+</tr>
+    
+</table>
+<p>例如,下面的命令根据tarball<span class="codefrag">~/share/hadoop.tar.gz</span>分配Hadoop:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">$ hod allocate -d ~/hadoop-cluster -n 10 -t ~/share/hadoop.tar.gz</span></td>
+</tr>
+</table>
+<p>类似地,使用hod脚本的语法如下:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+        
+<tr>
+          
+<td colspan="1" rowspan="1"><span class="codefrag">$ hod script -d cluster_directory -s scritp_file -n number_of_nodes -t hadoop_tarball_location</span></td>
+        
+</tr>
+    
+</table>
+<p>上面语法中指定的hadoop_tarball_location应指向从所有计算节点都可以访问的共享文件系统的路径。当前,HOD只支持挂载的NFS。</p>
+<p>
+<em>注意:</em>
+</p>
+<ul>
+    
+<li>为了获得更好分发性能,建议Hadoop tarball只包含库与二进制文件,不包含源代码或文档。</li>
+    
+<li>当你希望在用tarball方式分配的集群上执行作业,你必须使用兼容的Hadoop版本提交你的作业。最好的方式是解压,使用Tarball中的版本。</li>
+    
+<li>你需要确保在tar分发包的conf目录下没有Hadoop配置文件hadoop-env.sh和hadoop-site.xml。如果这些文件存在并包含错误的值,集群分配可能会失败。
+</li>
+  
+</ul>
+<a name="N10214"></a><a name="%E4%BD%BF%E7%94%A8%E5%A4%96%E9%83%A8HDFS"></a>
+<h3 class="h4">使用外部HDFS</h3>
+<a name="Using_an_external_HDFS" id="Using_an_external_HDFS"></a>
+<p>在典型的由HOD提供的Hadoop集群中,HDFS已经被静态地(未使用HOD)设置好。这能使数据在HOD提供的集群被回收后还可以持久保存在HDFS中。为使用静态配置的HDFS,你的hodrc必须指向一个外部HDFS。具体就是,在hodrc的<span class="codefrag">gridservice-hdfs</span>部分将下面选项设置为正确的值:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+<tr>
+<td colspan="1" rowspan="1">external = true</td>
+</tr>
+<tr>
+<td colspan="1" rowspan="1">host = HDFS NameNode主机名</td>
+</tr>
+<tr>
+<td colspan="1" rowspan="1">fs_port = HDFS NameNode端口</td>
+</tr>
+<tr>
+<td colspan="1" rowspan="1">info_port = HDFS NameNode web UI的端口</td>
+</tr>
+</table>
+<p>
+<em>注意:</em>你也可以从命令行开启这个选项。即,你这样去使用一个静态HDFS:<br>
+    
+</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+        
+<tr>
+          
+<td colspan="1" rowspan="1"><span class="codefrag">$ hod allocate -d cluster_dir -n number_of_nodes --gridservice-hdfs.external</span></td>
+        
+</tr>
+    
+</table>
+<p>如果需要,HOD即可以供应HDFS集群也可以供应Map/Reduce的集群HOD。这需要设置hodrc中的<span class="codefrag">gridservice-hdfs</span>部分的下列选项:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+<tr>
+<td colspan="1" rowspan="1">external = false</td>
+</tr>
+</table>
+<a name="N10258"></a><a name="%E9%85%8D%E7%BD%AEHadoop%E7%9A%84%E9%80%89%E9%A1%B9"></a>
+<h3 class="h4">配置Hadoop的选项</h3>
+<a name="Options_for_Configuring_Hadoop" id="Options_for_Configuring_Hadoop"></a>
+<p>HOD提供一个非常方便的机制能配置它提供的Hadoop守护进程和它在客户端生成的hadoop-site.xml。通过在HOD配置文件中指定配置参数,或在分配集群时在命令行指定都可做到这点。</p>
+<p>
+<strong>配置Hadoop守护进程</strong>
+</p>
+<a name="Configuring_Hadoop_Daemons" id="Configuring_Hadoop_Daemons"></a>
+<p>要配置Hadoop守护进程,你可以这么做:</p>
+<p>对于Map/Reduce,指定<span class="codefrag">gridservice-mapred</span>部分的<span class="codefrag">server-params</span>项的指为一个以逗号分割的key-value对列表。同配置动态HDFS集群一样,设置<span class="codefrag">gridservice-hdfs</span>部分的<span class="codefrag">server-params</span>项。如果这些参数应被标记成<em>final</em>,将这些参数包含到相应部分的<span class="codefrag">final-server-params</span>项中。</p>
+<p>例如:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">server-params = mapred.reduce.parallel.copies=20,io.sort.factor=100,io.sort.mb=128,io.file.buffer.size=131072</span></td>
+</tr>
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">final-server-params = mapred.child.java.opts=-Xmx512m,dfs.block.size=134217728,fs.inmemory.size.mb=128</span></td>
+  
+</tr>
+</table>
+<p>要从命令行指定选项,你可以用如下语法:</p>
+<p>配置Map/Reduce守护进程:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+        
+<tr>
+          
+<td colspan="1" rowspan="1"><span class="codefrag">$ hod allocate -d cluster_dir -n number_of_nodes -Mmapred.reduce.parallel.copies=20 -Mio.sort.factor=100</span></td>
+        
+</tr>
+    
+</table>
+<p>在上述例子中,<em>mapred.reduce.parallel.copies</em>参数和<em>io.sort.factor</em>参数将会被添加到<span class="codefrag">server-params</span>中,如果已经在<span class="codefrag">server-params</span>中存在,则它们会被覆盖。要将这些参数指定成<em>final</em>类型,你可以:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+        
+<tr>
+          
+<td colspan="1" rowspan="1"><span class="codefrag">$ hod allocate -d cluster_dir -n number_of_nodes -Fmapred.reduce.parallel.copies=20 -Fio.sort.factor=100</span></td>
+        
+</tr>
+    
+</table>
+<p>不过,应注意final参数无法被命令行改写的,只有在未指定的情形才能追加。</p>
+<p>配置动态供应的HDFS守护进程的选项与此相似。用-H替换-M以,用-S替换-F即可。</p>
+<p>
+<strong>配置Hadoop的作业提交(客户端)程序</strong>
+</p>
+<a name="Configuring_Hadoop_Job_Submissio" id="Configuring_Hadoop_Job_Submissio"></a>
+<p>如上所述,当allocate操作成功后,<span class="codefrag">cluster_dir/hadoop-site.xml</span>将会生成,其中会包含分配的集群的JobTracker和NameNode的信息。这个配置用于向集群提交作业。HOD提供选项可将其它的hadoop配置参数添加到该文件,其语法如下:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+        
+<tr>
+          
+<td colspan="1" rowspan="1"><span class="codefrag">$ hod allocate -d cluster_dir -n number_of_nodes -Cmapred.userlog.limit.kb=200 -Cmapred.child.java.opts=-Xmx512m</span></td>
+        
+</tr>
+    
+</table>
+<p>上例中,<em>mapred.userlog.limit.kb</em>和<em>mapred.child.java.opts</em>会被添加到hod产生的hadoop-site.xml中。</p>
+<a name="N102EA"></a><a name="%E6%9F%A5%E7%9C%8BHadoop%E7%9A%84Web-UI"></a>
+<h3 class="h4">查看Hadoop的Web-UI</h3>
+<a name="Viewing_Hadoop_Web_UIs" id="Viewing_Hadoop_Web_UIs"></a>
+<p>HOD的allocate操作会打印出JobTracker和NameNode的Web UI的URL。例如:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">$ hod allocate -d ~/hadoop-cluster -n 10 -c ~/hod-conf-dir/hodrc</span>
+<br>
+    
+<span class="codefrag">INFO - HDFS UI on http://host242.foo.com:55391</span>
+<br>
+    
+<span class="codefrag">INFO - Mapred UI on http://host521.foo.com:54874</span>
+    </td>
+</tr>
+</table>
+<p>上面提到的<em>info</em>操作可以给你同样的信息。</p>
+<a name="N1030C"></a><a name="%E6%94%B6%E9%9B%86%E5%92%8C%E6%9F%A5%E7%9C%8BHadoop%E6%97%A5%E5%BF%97"></a>
+<h3 class="h4">收集和查看Hadoop日志</h3>
+<a name="Collecting_and_Viewing_Hadoop_Lo" id="Collecting_and_Viewing_Hadoop_Lo"></a>
+<p>要获取在某些分配节点上运行的守护进程的Hadoop日志:</p>
+<ul>
+    
+<li>登录感兴趣的节点。如果你想查看JobTracker或者NameNode的日志,<em>list</em>和<em>info</em>操作能告诉你这些进程在那些节点上运行。</li>
+    
+<li>获取感兴趣的守护进程的进程信息(例如,<span class="codefrag">ps ux | grep TaskTracker</span>)</li>
+    
+<li>在这些进程信息中,查找变量<span class="codefrag">-Dhadoop.log.dir</span>的值。通常是hod配置文件里<span class="codefrag">hodring.temp-dir</span>目录的一个子目录 。</li>
+    
+<li>切换到<span class="codefrag">hadoop.log.dir</span>目录以查看守护进程日志和用户日志。</li>
+  
+</ul>
+<p>HOD也提供了一个机制,能让你在集群回收后将日志收集存放到文件系统,或者一个在外部配置的HDFS中。这样的话,在作业完成,节点回收后你还可以看这些日志。要做到这点,像下面一样为log-destination-uri指定一个URI:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">log-destination-uri= hdfs://host123:45678/user/hod/logs</span>或者</td>
+</tr>
+    
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">log-destination-uri= file://path/to/store/log/files</span></td>
+</tr>
+    
+</table>
+<p>在上面指定的的根目录中,HOD会创建路径user_name/torque_jobid,把作业涉及到的每个节点上的日志文件gzip压缩,存放在里面。</p>
+<p>注意要在HDFS上存储这些文件,你得将<span class="codefrag">hodring.pkgs</span>项配置为和刚才提到的HDFS兼容的版本。否则,HOD会尝试使用它供应Hadoop集群时用到的Hadoop版本。</p>
+<a name="N10355"></a><a name="%E9%97%B2%E7%BD%AE%E9%9B%86%E7%BE%A4%E7%9A%84%E8%87%AA%E5%8A%A8%E5%9B%9E%E6%94%B6"></a>
+<h3 class="h4">闲置集群的自动回收</h3>
+<a name="Auto_deallocation_of_Idle_Cluste" id="Auto_deallocation_of_Idle_Cluste"></a>
+<p>HOD会自动回收在一段时间内没有运行Hadoop作业的集群。每次的HOD分配会带有一个监控设施不停地检查Hadoop作业的执行。如果侦测到在一定时间内没Hadoop作业在执行,它就回收这个集群,释放那些未被有效利用的节点。</p>
+<p>
+<em>注意:</em>当集群被回收时,<em>集群目录</em>没有被自动清空。用户须通过一个正式的<em>deallcocate</em>操作清理它。</p>
+<a name="N1036B"></a><a name="%E6%8C%87%E5%AE%9A%E9%A2%9D%E5%A4%96%E7%9A%84%E4%BD%9C%E4%B8%9A%E5%B1%9E%E6%80%A7"></a>
+<h3 class="h4">指定额外的作业属性</h3>
+<a name="Specifying_Additional_Job_Attrib" id="Specifying_Additional_Job_Attrib"></a>
+<p>HOD允许用户为一个Torque作业指定一个时钟时间和一个名称(或者标题)。 </p>
+<p>时钟时间是对Torque作业有效时间的一个估计。这个时间过期后,Torque将自动删除这个作业,释放其节点。指定这个时钟时间还能帮助作业调度程序更好的安排作业,提高对集群资源的使用率。</p>
+<p>指定时钟时间的语法如下:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+        
+<tr>
+          
+<td colspan="1" rowspan="1"><span class="codefrag">$ hod allocate -d cluster_dir -n number_of_nodes -l time_in_seconds</span></td>
+        
+</tr>
+    
+</table>
+<p>Torque作业的名称或标题能给用户以友好的作业标识。每次展示Torque作业的属性的时候,这个字符串就会出现,包括<span class="codefrag">qstat</span>命令。</p>
+<p>指定名称或标题的语法如下:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+        
+<tr>
+          
+<td colspan="1" rowspan="1"><span class="codefrag">$ hod allocate -d cluster_dir -n number_of_nodes -N name_of_job</span></td>
+        
+</tr>
+    
+</table>
+<p>
+<em>注意:</em>由于底层Torque资源管理器的限制,不以字母开头或者包含空格的名字将导致作业失败。失败信息会表明问题存在于指定的作业名称中。</p>
+<a name="N103A2"></a><a name="%E6%8D%95%E8%8E%B7HOD%E5%9C%A8Torque%E4%B8%AD%E7%9A%84%E9%80%80%E5%87%BA%E7%A0%81"></a>
+<h3 class="h4">捕获HOD在Torque中的退出码</h3>
+<a name="Capturing_HOD_exit_codes_in_Torq" id="Capturing_HOD_exit_codes_in_Torq"></a>
+<p>HOD退出码出现在Torque的exit_status字段中。这有助于使用者和系统管理员区分成功的HOD执行和失败的HOD执行。如果分配成功且所有Hadoop作业在所分配的集群上正确的执行,退出码为0。如果分配失败或者部分hadoop作业在分配集群上运行失败,退出码非0。下表列出了可能出现的退出码。<em>注意:只有所使用的Hadoop版本是0.16或以上时,Hadoop作业状态才可以被捕获。</em>
+</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+    
+      
+<tr>
+        
+<td colspan="1" rowspan="1">退出码</td>
+        <td colspan="1" rowspan="1">含义</td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 6 </td>
+        <td colspan="1" rowspan="1">Ringmaster故障</td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 7 </td>
+        <td colspan="1" rowspan="1"> DFS故障</td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 8 </td>
+        <td colspan="1" rowspan="1"> Job tracker故障</td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 10 </td>
+        <td colspan="1" rowspan="1"> 集群死亡</td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 12 </td>
+        <td colspan="1" rowspan="1"> 集群已分配 </td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 13 </td>
+        <td colspan="1" rowspan="1"> HDFS死亡</td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 14 </td>
+        <td colspan="1" rowspan="1"> Mapred死亡</td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 16 </td>
+        <td colspan="1" rowspan="1">集群中所有的Map/Reduce作业失败。查看hadoop日志了解更多细节。</td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 17 </td>
+        <td colspan="1" rowspan="1">集群中部分的Map/Reduce作业失败。查看hadoop日志了解更多细节。</td>
+      
+</tr>
+  
+</table>
+<a name="N10434"></a><a name="%E5%91%BD%E4%BB%A4%E8%A1%8C"></a>
+<h3 class="h4">命令行</h3>
+<a name="Command_Line" id="Command_Line"></a>
+<p>HOD命令行的通用的语法如下:<br>
+      
+<em>hod &lt;operation&gt; [ARGS] [OPTIONS]<br>
+</em>
+      允许的操作有&lsquo;allocate&rsquo;,&lsquo;deallocate&rsquo;,&lsquo;info&rsquo;,&lsquo;list&rsquo;,&lsquo;script&rsquo;以及&lsquo;help&rsquo;。要获取某特定操作的帮助你可以执行:<span class="codefrag">hod help &lt;operation&gt;</span>。要查看可能的操作你可以执行<span class="codefrag">hod help options</span>。</p>
+<p>
+<em>allocate</em>
+<br>
+      
+<em>用法:hod allocate -d cluster_dir -n number_of_nodes [OPTIONS]</em>
+<br>
+      分配一个指定节点数目的集群,把分配信息存放在cluster_dir方便后续<span class="codefrag">hadoop</span>命令使用。注意<span class="codefrag">cluster_dir</span>必须在运行该命令前已经存在。</p>
+<p>
+<em>list</em>
+<br>
+      
+<em>用法:hod list [OPTIONS]</em>
+<br>
+      列举出用户分配的所有集群。提供的信息包括集群对应的的Torque作业标识,存储分配信息的集群目录,Map/Reduce守护进程是否存活。</p>
+<p>
+<em>info</em>
+<br>
+      
+<em>用法:hod info -d cluster_dir [OPTIONS]</em>
+<br>
+      列举集群分配信息存放于某指定集群目录的集群信息。</p>
+<p>
+<em>deallocate</em>
+<br>
+      
+<em>用法:hod deallocate -d cluster_dir [OPTIONS]</em>
+<br>
+      回收集群分配信息存放于某指定集群目录的集群。</p>
+<p>
+<em>script</em>
+<br>
+      
+<em>用法:hod script -s script_file -d cluster_directory -n number_of_node [OPTIONS]</em>
+<br>
+      用HOD<em>script</em>操作执行一个hadoop脚本。在给定数目的节点上提供Hadoop,在提交的节点执行这个脚本,并在脚本执行结束后回收集群。</p>
+<p>
+<em>help</em>
+<br>
+      
+<em>用法:hod help [operation | 'options']</em>
+<br>
+      未指定参数时,<span class="codefrag">hod help</span>给出用法以及基本选项,等同于<span class="codefrag">hod --help</span> (见下文)。当指定参数&lsquo;options&rsquo;时,显示hod的基本选项。当指定operation时,它会显示出该特定operation的用法和相应的描述。例如,希望了解allocate操作,你可以执行<span class="codefrag">hod help allocate</span>
+</p>
+<p>除上面的操作外,HOD还能接受下列命令行选项。</p>
+<p>
+<em>--help</em>
+<br>
+      打印出用法和基本选项的帮助信息。</p>
+<p>
+<em>--verbose-help</em>
+<br>
+      hodrc文件中所有的配置项均可通过命令行传递,使用语法<span class="codefrag">--section_name.option_name[=vlaue]</span>。这种方式下,命令行传递的参数会覆盖hodrc中的配置项。verbose-help命令会列出hodrc文件中全部可用项。这也是一个了解配置选项含义的好方法。</p>
+<p>
+<a href="#Options_Configuring_HOD">下一部分</a>有多数重要的hod配置项的描述。对于基本选项,你可以通过<span class="codefrag">hod help options</span>了解,对于所有的hod配置中的可能选项,你可以参看<span class="codefrag">hod --verbose-help</span>的输出。了解所有选项的描述,请参看<a href="hod_config_guide.html">配置指南</a>。</p>
+<a name="N104BB"></a><a name="HOD%E9%85%8D%E7%BD%AE%E9%80%89%E9%A1%B9"></a>
+<h3 class="h4"> HOD配置选项</h3>
+<a name="Options_Configuring_HOD" id="Options_Configuring_HOD"></a>
+<p> 如上所述,HOD的配置是通过系统管理员设置配置文件完成。这是一个INI风格的配置文件,文件分成多个段,每个段包含一些配置项。这些段分别和HOD的进程:client,ringmaster,hodring,mapreduce或hdfs相关。每一个配置项有选项名和值构成。</p>
+<p>有两种方式可让用户覆盖默认配置文件里的设定:</p>
+<ul>
+    
+<li>在每条命令前,用户可以向HOD提供自己的配置文件,使用<span class="codefrag">-c</span>选项。</li>
+    
+<li>用户可以在命令行指定HOD的配置选项覆盖正使用的配置文件中提供的值。</li>
+  
+</ul>
+<p>这一节介绍一些最常用的配置项。为了指定方便,这些常用选项通常会有一个<em>短</em>选项名。所有其它选项可能用随后介绍的<em>长</em>选项指定。</p>
+<p>
+<em>-c config_file</em>
+<br>
+  提供要使用的配置文件。可与其他任何的HOD选项一起使用。此外,可定义<span class="codefrag">HOD_CONF_DIR</span>环境变量为一个包含<span class="codefrag">hodrc</span>文件的目录,避免每条HOD命令都要指定配置文件。</p>
+<p>
+<em>-d cluster_dir</em>
+<br>
+  大多数hod操作都要求这个选项。如<a href="#Create_a_Cluster_Directory">此处</a>描述的,<em>集群目录</em>是在本地文件系统上的一个目录,<span class="codefrag">hod</span>将它分配集群的相应Hadoop配置产生在这个目录里,即<em>hadoop-site.xml</em>。使用-d或者--hod.clusterdir将这个参数传递给<span class="codefrag">hod</span>操作,如果目录不存在,HOD会自动创建该目录。集群分配好后,用户可在这个集群上,通过指定hadoop--config为集群目录来执行Hadoop作业。</p>
+<p>
+<em>-n number_of_nodes</em>
+<br>
+  hod allocation操作和script操作要求这个选项。表示要分配的节点数。</p>
+<p>
+<em>-s script-file</em>
+<br>
+  脚本操作时需要,用于指定要执行的脚本文件。</p>
+<p>
+<em>-b 1|2|3|4</em>
+<br>
+  启用给定的调试级别。能与其他HOD选项一起使用。级别4最为详尽。</p>
+<p>
+<em>-t hadoop_tarball</em>
+<br>
+  从指定tar.gz文件提供Hadoop分发。此选项值只适用于<em>allocate</em>操作。为获得更好的分发性能,强烈推荐创建Hadoop tarball<em>前</em>删除其中的源代码或文档。</p>
+<p>
+<em>-N job-name</em>
+<br>
+  内部使用的资源管理作业名。比如,对于Torque作为资源管理器的情况,会被解释成<span class="codefrag">qsub -N</span>选项,使用<span class="codefrag">qstat</span>命令时可以看到这个作业名。</p>
+<p>
+<em>-l wall-clock-time</em>
+<br>
+  用户希望在分配的集群作业的时间总量。它被传递给HOD底层的资源管理器,用于更有效地调度和利用集群。注意对于Torque的情形,这个时间到期后,集群会在被自动回收。</p>
+<p>
+<em>-j java-home</em>
+<br>
+  JAVA_HOME环境变量里指定的路径。在<em>script</em>操作中使用。HOD将JAVA_HOME环境变量设置为这个值,并在此环境下启动用户脚本。</p>
+<p>
+<em>-A account-string</em>
+<br>
+  传递给后台资源管理器的核计信息。</p>
+<p>
+<em>-Q queue-name</em>
+<br>
+  接受作业提交的后台资源管理器中队列的名称。</p>
+<p>
+<em>-Mkey1=value1 -Mkey2=value2</em>
+<br>
+  为供应的Map/Reduce守护进程(JobTracker以及TaskTracker)提供配置参数。在集群节点上,会根据这些值产生一个hadoop-site.xml。 <br>
+  
+<em>注意:</em>值中的下列字符:空格,逗号,等号,分号需要使用&lsquo;\&rsquo;转义, 且放置在引号中。你也可以使用&lsquo;\&rsquo;来转义&lsquo;\&rsquo;。</p>
+<p>
+<em>-Hkey1=value1 -Hkey2=value2</em>
+<br>
+  为供应的HDFS守护进程(NameNode以及DataNode)提供配置参数。在集群节点上,会根据这些值产生一个hadoop-site.xml。 <br>
+  
+<em>注意:</em>值中的下列字符:空格,逗号,等号,分号需要使用&lsquo;\&rsquo;转义, 且放置在引号中。你也可以使用&lsquo;\&rsquo;来转义&lsquo;\&rsquo;。</p>
+<p>
+<em>-Ckey1=value1 -Ckey2=value2</em>
+<br>
+  为提交作业的客户端提供配置参数。在提交节点上,会根据这些值产生一个hadoop-site.xml。<br>
+  
+<em>注意:</em>参数值可以使用以下符号:空格,逗号,等号,需要&lsquo;\&rsquo;做转义符的分号,上述符号要用引号进行分割。你也可以使用&lsquo;\&rsquo;转义&lsquo;\&rsquo;。 </p>
+<p>
+<em>--section-name.option-name=value</em>
+<br>
+  这是用<em>长</em>格式提供配置选项的方法。比如,你可以<em>--hod.script-wait-time=20</em>
+</p>
+</div>
+
+<a name="N10572"></a><a name="%E6%95%85%E9%9A%9C%E6%8E%92%E9%99%A4"></a>
+<h2 class="h3">故障排除</h2>
+<div class="section">
+<a name="Troubleshooting" id="Troubleshooting"></a>
+<p>下节列出了一些用户使用HOD时可能碰到的多发错误的条件以及解决问题的方法</p>
+<a name="N1057D"></a><a name="%E5%88%86%E9%85%8D%E6%93%8D%E4%BD%9C%E6%97%B6"></a>
+<h3 class="h4">分配操作时hod挂起</h3>
+<a name="_hod_Hangs_During_Allocation" id="_hod_Hangs_During_Allocation"></a><a name="hod_Hangs_During_Allocation" id="hod_Hangs_During_Allocation"></a>
+<p>
+<em>可能原因:</em>HOD或Hadoop的一个组件启动失败。这种情况下,<span class="codefrag">hod</span>命令会在一段时间(通常是2-3分钟)后返回,退出码是错误代码部分定义的错误码7或8。参考该部分以获得更多细节。 </p>
+<p>
+<em>可能原因:</em>使用tarball模式申请了大规模的集群。有时由于网络负载,或者是分配节点上的负载,tarball分发过程可能会慢的比较明显,需要几分钟才能响应。等待命令完成。还可以检查一下tarball,看是否不含Hadoop源码或文档。</p>
+<p>
+<em>可能原因:</em>Torque相关的问题。如果原因与Torque相关,<span class="codefrag">hod</span>命令5分钟内是不会返回的。在调试模式下运行<span class="codefrag">hod</span>你会发现<span class="codefrag">qstat</span>命令被重复执行。在另一个shell中执行<span class="codefrag">qstat</span>命令你会发现作业处于<span class="codefrag">Q</span>(排队)状态。这通常说明Torque出现了问题。可能原因有个别节点宕机,或者增加了新节点但Torque不知。通常,需要系统管理员帮助解决此问题。</p>
+<a name="N105AB"></a><a name="%E5%9B%9E%E6%94%B6%E6%93%8D%E4%BD%9C%E6%97%B6"></a>
+<h3 class="h4">回收操作时hod挂起</h3>
+<a name="_hod_Hangs_During_Deallocation" id="_hod_Hangs_During_Deallocation"></a><a name="hod_Hangs_During_Deallocation" id="hod_Hangs_During_Deallocation"></a>
+<p>
+<em>可能原因:</em>Torque相关的问题,通常是Torque server上的负载较大,或者是分配的集群非常大。一般来说,你唯一能做的是等待命令执行完成。</p>
+<a name="N105BD"></a><a name="%E5%A4%B1%E8%B4%A5%E6%97%B6%E7%9A%84%E9%94%99%E8%AF%AF%E4%BB%A3%E7%A0%81%E5%92%8C%E9%94%99%E8%AF%AF%E4%BF%A1%E6%81%AF"></a>
+<h3 class="h4">hod失败时的错误代码和错误信息</h3>
+<a name="hod_Fails_With_an_error_code_and" id="hod_Fails_With_an_error_code_and"></a><a name="_hod_Fails_With_an_error_code_an" id="_hod_Fails_With_an_error_code_an"></a>
+<p>如果<span class="codefrag">hod</span>命令的退出码不是<span class="codefrag">0</span>,参考下面的退出代码表确定此情况发生的原因和相应的调试方法。</p>
+<p>
+<strong>错误代码</strong>
+</p>
+<a name="Error_Codes" id="Error_Codes"></a>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+    
+      
+<tr>
+        
+<th colspan="1" rowspan="1">错误代码</th>
+        <th colspan="1" rowspan="1">含义</th>
+        <th colspan="1" rowspan="1">可能原因及补救方法</th>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 1 </td>
+        <td colspan="1" rowspan="1">配置错误 </td>
+        <td colspan="1" rowspan="1">hodrc中的参数错误,或者其他与HOD配置相关的错误。此类情况下,错误信息已经足够帮你发现和解决问题。</td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 2 </td>
+        <td colspan="1" rowspan="1">无效操作</td>
+        <td colspan="1" rowspan="1">执行<span class="codefrag">hod help</span>查看有效的操作列表。</td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 3 </td>
+        <td colspan="1" rowspan="1">无效操作参数</td>
+        <td colspan="1" rowspan="1">执行<span class="codefrag">hod help operation</span>查看特定操作的用法。</td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 4 </td>
+        <td colspan="1" rowspan="1">调度失败</td>
+        <td colspan="1" rowspan="1"> 1. 请求分配了过多的资源。执行<span class="codefrag">checknodes cluster_name</span>查看是否有足够多的可用节点。<br>
+             2. 请求的资源超出了资源管理器的限制。<br>
+             3. Torque配置错误,Torque可执行文件路径配置错误,或者其它Torque相关问题。联系系统管理员。</td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 5 </td>
+        <td colspan="1" rowspan="1">执行作业失败</td>
+        <td colspan="1" rowspan="1"> 1. Torque作业被外部删除。执行Torque <span class="codefrag">qstat</span>命令查看是否有作业处于<span class="codefrag">R</span>(运行)状态。如果没有,尝试重新运行HOD。<br>
+          2. Torque的问题诸如服务器暂时性宕机,或者无响应。联系系统管理员。 <br>
+          3. 系统管理员可能配置了帐号核实,并且一个非法的帐号被指定。请联系系统管理员。 </td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 6 </td>
+        <td colspan="1" rowspan="1">Ringmaster故障</td>
+        <td colspan="1" rowspan="1"> HOD会打印信息"Cluster could not be allocated because of the following errors on the ringmaster host &lt;hostname&gt;"。实际的错误信息可能指示下列情形中的一种:<br>
+          1. 运行ringmaster的节点配置不合法,错误信息中的hostname会指明具体的机器。<br>
+          2. <span class="codefrag">ringmaster</span>段的配置无效,<br>
+          3. <span class="codefrag">gridservice-mapred或者gridservice-hdfs</span>段中<span class="codefrag">pkgs</span>项的配置无效,<br>
+          4. 无效的hadoop tarball,或者tarball中conf目录下存在无效的配置文件,<br>
+          5. Hadoop中的MapReduce与外部HDFS版本不匹配。<br>
+          Torque <span class="codefrag">qstat</span>命令很可能会显示一个出于<span class="codefrag">C</span>(Completed,已完成)状态的作业。<br>
+          你可以登录到HOD失败信息中给出的ringmaster主机,根据错误信息的提示解决问题。如果错误信息没有给出完整的信息,ringmaster日志也可能帮助找到问题的根源。参考下面<em>定位Ringmaster日志</em>一节了解更多信息。</td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 7 </td>
+        <td colspan="1" rowspan="1"> DFS故障</td>
+        <td colspan="1" rowspan="1"> 当HOD由于DFS故障(或者Job tracker失败,错误码8,下文有介绍)分配失败时,它会打印错误信息 "Hodring at &lt;hostname&gt; failed with following errors:",并给出真正的错误信息,这个信息可能表明下列情形中的一种:<br>
+	  1. 启动Hadoop集群时出现问题。通常错误信息会表明之前提到的主机出现错误的真正原因。你也要检查HOD配置中文件中Hadoop相关的配置。按上面<em>收集和查看Hadoop日志</em>一节中介绍的方法查看Hadoop的日志。<br>
+          2. 运行hodring的节点上的配置无效,错误信息中的hostname会指明机器<br>
+          3. hodrc中<span class="codefrag">hodring</span>段的配置无效。<span class="codefrag">ssh</span>到错误信息中提到的节点,在hdring日志中grep<span class="codefrag">ERROR</span>或<span class="codefrag">CRITICAL</span>。参考下面<em>定位Hodring日志</em>部分获取更多信息。<br>
+	  4. 指定了无效的tarball,可能未正确打包。<br>
+          5. 无法与外部配置的HDFS通信。<br>
+          当DFS或Job tracker出现故障时,你可以登录到HOD失败信息中提到的主机上,进行debug。解决问题的时候,你也应通过查看ringmaster日志中的其它日志信息,来检查其他机器是否在启动jobtracker/namenode时也出现了问题,而不只是检查错误信息中提到的主机。其他机器也可能发生问题是因为HOD会按照配置项<a href="hod_config_guide.html#3.4+ringmaster%E7%9A%84%E9%85%8D%E7%BD%AE%E9%A1%B9">ringmaster.max-master-failures</a>的设置在多个机器上连续尝试和启动hadoop守护进程。更多关于ringmaster日志的信息请参考下文<em>定位Ringmaster日志</em>。
+</td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 8 </td>
+        <td colspan="1" rowspan="1">Job tracker故障</td>
+        <td colspan="1" rowspan="1">与<em>DFS故障</em>情形中的原因类似。</td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 10 </td>
+        <td colspan="1" rowspan="1">集群死亡</td>
+        <td colspan="1" rowspan="1">1. 集群因为较长时间空闲被自动回收。<br>
+          2. 集群因系统管理员或者用户指定的时钟时间到期被自动回收。<br>
+          3. 无法与成功分配的JobTracker以及HDFS的NameNode通信。回收集群,重新分配。</td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 12 </td>
+        <td colspan="1" rowspan="1">集群已分配</td>
+        <td colspan="1" rowspan="1">指定的集群目录是已被用于先前的分配操作,且尚未回收。指定另外一个目录,或者先回收先前分配的。</td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 13 </td>
+        <td colspan="1" rowspan="1">HDFS死亡</td>
+        <td colspan="1" rowspan="1">无法与HDFS的NameNode通信。HDFS的NameNode停掉了。</td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 14 </td>
+        <td colspan="1" rowspan="1">Mapred死亡</td>
+        <td colspan="1" rowspan="1"> 1. 集群因为长时间闲置被自动回收。 <br>
+          2. 集群因系统管理员或用户指定的时钟时间到期被自动回收。<br>
+	  3. 无法与Map/Reduce的JobTracker通信。JobTracker节点宕机。 <br>
+          
+</td>
+      
+</tr>
+      
+<tr>
+        
+<td colspan="1" rowspan="1"> 15 </td>
+        <td colspan="1" rowspan="1">集群未分配</td>
+        <td colspan="1" rowspan="1">一个需要已分配集群的操作被指以一个没有状态信息的集群目录。</td>
+      
+</tr>
+   
+      
+<tr>
+        
+<td colspan="1" rowspan="1">任意非0退出代码</td>
+        <td colspan="1" rowspan="1">HOD脚本错误</td>
+        <td colspan="1" rowspan="1">如果使用了hod的脚本选项,很可能这个退出代码是脚本的退出吗。不幸的是,这可能会与hod自己的退出码冲突。为帮助用户区分两者,如果脚本返回了一个退出码,hod将此退出码写到了集群目录下的script.exitcode文件。你可以cat这个文件以确定脚本的退出码。如果文件不存在,则退出代码是hod命令的退出码。</td> 
+      
+</tr>
+  
+</table>
+<a name="N10752"></a><a name="Hadoop+DFSClient%E8%AD%A6%E5%91%8ANotReplicatedYetException%E4%BF%A1%E6%81%AF"></a>
+<h3 class="h4">Hadoop DFSClient警告NotReplicatedYetException信息</h3>
+<p>有时,当你申请到一个HOD集群后马上尝试上传文件到HDFS时,DFSClient会警告NotReplicatedYetException。通常会有一个这样的信息 - </p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">WARN
+hdfs.DFSClient: NotReplicatedYetException sleeping &lt;filename&gt; retries
+left 3</span></td>
+</tr>
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">08/01/25 16:31:40 INFO hdfs.DFSClient:
+org.apache.hadoop.ipc.RemoteException: java.io.IOException: File
+&lt;filename&gt; could only be replicated to 0 nodes, instead of
+1</span></td>
+</tr>
+</table>
+<p> 当你向一个DataNodes正在和NameNode联络的集群上传文件的时候,这种现象就会发生。在上传新文件到HDFS之前多等待一段时间就可以解决这个问题,因为这使得足够多的DataNode启动并且联络上了NameNode。</p>
+<a name="N1076A"></a><a name="%E6%88%90%E5%8A%9F%E5%88%86%E9%85%8D%E7%9A%84%E9%9B%86%E7%BE%A4%E4%B8%8A%E6%97%A0%E6%B3%95%E8%BF%90%E8%A1%8CHadoop%E4%BD%9C%E4%B8%9A"></a>
+<h3 class="h4">成功分配的集群上无法运行Hadoop作业</h3>
+<a name="Hadoop_Jobs_Not_Running_on_a_Suc" id="Hadoop_Jobs_Not_Running_on_a_Suc"></a>
+<p>这一情景通常发生在这种情形:一个集群已经分配,并且一段时间内处于不活跃状态,之后hadoop作业试图在这个集群上运行。Hadoop作业会失败,产生如下异常信息:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+<tr>
+<td colspan="1" rowspan="1"><span class="codefrag">08/01/25 16:31:40 INFO ipc.Client: Retrying connect to server: foo.bar.com/1.1.1.1:53567. Already tried 1 time(s).</span></td>
+</tr>
+</table>
+<p>
+<em>可能原因:</em>相当长的时间内无hadoop作业运行,集群会如<em>闲置集群的自动回收</em>一节介绍的那样被自动回收。回收该集群,然后重新分配。</p>
+<p>
+<em>可能原因:</em>从分配开始算起,Torque管理员指定的或<em>指定额外的作业属性</em>一节中定义的<span class="codefrag">-l</span>选项指定的时间上限过期。这种情况下集群可能已被释放。回收集群,然后重新分配。</p>
+<p>
+<em>可能原因:</em>提交作业使用的hadoop版本和供应集群的Hadoop版本(通常通过tarball选项)不匹配。确保使用的兼容的版本。</p>
+<p>
+<em>可能原因:</em> 提交job的hadoop客户端与提供的hadoop(通常通过tarball选项)版本不兼容。 确保所使用hadoop软件版本兼容。</p>
+<p>
+<em>可能原因:</em> 你使用了<span class="codefrag">-M or -H</span>中的一个指定Hadoop配置,其中有未正确转义的字符比如空格或逗号。参考<em>HOD配置选项</em>一节以了解如何正确指定这些选项。</p>
+<a name="N107A5"></a><a name="%E6%88%91%E7%9A%84Hadoop%E4%BD%9C%E4%B8%9A%E8%A2%AB%E4%B8%AD%E6%AD%A2%E4%BA%86"></a>
+<h3 class="h4">我的Hadoop作业被中止了</h3>
+<a name="My_Hadoop_Job_Got_Killed" id="My_Hadoop_Job_Got_Killed"></a>
+<p>
+<em>可能原因:</em>从分配开始算起,Torque管理员指定的或<em>指定额外的作业属性</em>一节中定义的<span class="codefrag">-l</span>选项指定的时间上限过期。这种情况下集群可能已被释放。回收集群,然后重新分配,这次要制定一个大点儿的时钟时间。</p>
+<p>
+<em>可能原因:</em> JobTracker节点出现问题。参考<em>收集和查看Hadoop日志</em>一节以获取更多信息。</p>
+<a name="N107C0"></a><a name="Hadoop%E4%BD%9C%E4%B8%9A%E5%A4%B1%E8%B4%A5%E5%B9%B6%E8%BF%94%E5%9B%9E%E6%B6%88%E6%81%AF%EF%BC%9A%E2%80%98Job+tracker+still+initializing%E2%80%99"></a>
+<h3 class="h4">Hadoop作业失败并返回消息:&lsquo;Job tracker still initializing&rsquo;</h3>
+<a name="Hadoop_Job_Fails_with_Message_Jo" id="Hadoop_Job_Fails_with_Message_Jo"></a>
+<p>
+<em>可能原因:</em>hadoop作业是作为HOD脚本的一部分运行的,它在JobTracker完全就绪前开始了执行。分配集群时为配置选<span class="codefrag">--hod.script-wait-time</span>设定一个大点儿的值。通常取120是可以工作的,尽管通常没必要这么大。</p>
+<a name="N107D0"></a><a name="Torque%E7%9A%84%E9%80%80%E5%87%BA%E4%BB%A3%E7%A0%81%E6%B2%A1%E6%9C%89%E5%8C%85%E5%90%ABHOD%E7%9A%84"></a>
+<h3 class="h4">Torque的退出代码没有包含HOD的</h3>
+<a name="The_Exit_Codes_For_HOD_Are_Not_G" id="The_Exit_Codes_For_HOD_Are_Not_G"></a>
+<p>
+<em>可能原因:</em>此功能需要Hadoop 0.16。所用的Hadoop版本不满足这个条件。请使用合适的Hadoop版本。</p>
+<p>
+<em>可能原因:</em>没有使用<span class="codefrag">hod</span>命令回收集群;例如直接使用<span class="codefrag">qdel</span>。当使用这种方式回收集群时,HOD进程被信号中止。这会导致退出码是基于signal number的,而不是程序的退出码。</p>
+<a name="N107E8"></a><a name="Hadoop%E6%97%A5%E5%BF%97%E6%9C%AA%E8%A2%AB%E4%B8%8A%E4%BC%A0%E5%88%B0DFS"></a>
+<h3 class="h4">Hadoop日志未被上传到DFS</h3>
+<a name="The_Hadoop_Logs_are_Not_Uploaded" id="The_Hadoop_Logs_are_Not_Uploaded"></a>
+<p>
+<em>可能原因:</em>上传日志的使用的hadoop与外部的HDFS版本不兼容。确保<span class="codefrag">hodring.pkgs</span>选项指定了正确的版本。</p>
+<a name="N107F8"></a><a name="%E5%AE%9A%E4%BD%8DRingmaster%E6%97%A5%E5%BF%97"></a>
+<h3 class="h4">定位Ringmaster日志</h3>
+<a name="Locating_Ringmaster_Logs" id="Locating_Ringmaster_Logs"></a>
+<p>遵循以下步骤定位ringmaster日志:</p>
+<ul>
+    
+<li>用-b选项在调试模式执行hod。这会打印出当前运行的Torque作业的标识。</li>
+    
+<li>执行<span class="codefrag">qstat -f torque_job_id</span>,在输出中查找<span class="codefrag">exec_host</span>参数的值。列表中的第一个主机就是ringmaster节点。</li>
+    
+<li>登陆该节点。</li>
+  
+<li>ringmaster日志的位置由hodrc中的<span class="codefrag">ringmaster.log-dir</span>项指定。日志文件的名字会是<span class="codefrag">username.torque_job_id/ringmaster-main.log</span>。</li>
+    
+<li>如果你没有获取到足够的信息,你可以将ringmaster的调试级别设为4。这可通过向hod命令行传递<span class="codefrag">--ringmaster.debug 4</span>做到。</li>
+  
+</ul>
+<a name="N10824"></a><a name="%E5%AE%9A%E4%BD%8DHodring%E6%97%A5%E5%BF%97"></a>
+<h3 class="h4">定位Hodring日志</h3>
+<a name="Locating_Hodring_Logs" id="Locating_Hodring_Logs"></a>
+<p>遵循以下步骤定位hodring日志:</p>
+<ul>
+    
+<li>用-b选项在调试模式下运行hod。这将打印当前运行的Torque作业的标识。</li>
+    
+<li>执行<span class="codefrag">qstat -f torque_job_id</span>,查看输出中<span class="codefrag">exec_host</span>参数的值。列表中的的所有节点上都有一个hodring。</li>
+    
+<li>登陆到任何一个节点。</li>
+    
+<li>hodring日志的位置由hodrc中的<span class="codefrag">hodring.log-dir</span>项指定。日志文件的名字会是<span class="codefrag">username.torque_job_id/hodring-main.log</span>。</li>
+    
+<li>如果你没有获得足够的信息,你或许想将hodring的调试等级更改为4。这可以向hod命令行传递<span class="codefrag">--hodring.debug 4</span> 来做到。</li>
+  
+</ul>
+</div>	
+
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 358 - 0
docs/cn/hod_user_guide.pdf


BIN
docs/cn/images/built-with-forrest-button.png


BIN
docs/cn/images/core-logo.gif


BIN
docs/cn/images/favicon.ico


BIN
docs/cn/images/hadoop-logo.jpg


BIN
docs/cn/images/hdfsarchitecture.gif


BIN
docs/cn/images/hdfsdatanodes.gif


BIN
docs/cn/images/instruction_arrow.png


+ 268 - 0
docs/cn/index.html

@@ -0,0 +1,268 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Hadoop文档</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://hadoop.apache.org/core/">项目</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop">维基</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.18文档</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">文档</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menupage">
+<div class="menupagetitle">概述</div>
+</div>
+<div class="menuitem">
+<a href="quickstart.html">快速入门</a>
+</div>
+<div class="menuitem">
+<a href="cluster_setup.html">集群搭建</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_design.html">HDFS构架设计</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_user_guide.html">HDFS使用指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_permissions_guide.html">HDFS权限指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_quota_admin_guide.html">HDFS配额管理指南</a>
+</div>
+<div class="menuitem">
+<a href="commands_manual.html">命令手册</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_shell.html">FS Shell使用指南</a>
+</div>
+<div class="menuitem">
+<a href="distcp.html">DistCp使用指南</a>
+</div>
+<div class="menuitem">
+<a href="mapred_tutorial.html">Map-Reduce教程</a>
+</div>
+<div class="menuitem">
+<a href="native_libraries.html">Hadoop本地库</a>
+</div>
+<div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
+<a href="hadoop_archives.html">Hadoop Archives</a>
+</div>
+<div class="menuitem">
+<a href="hod.html">Hadoop On Demand</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/">维基</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">发行说明</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">变更日志</a>
+</div>
+</div>
+<div id="credit">
+<hr>
+<a href="http://forrest.apache.org/"><img border="0" title="Built with Apache Forrest" alt="Built with Apache Forrest - logo" src="images/built-with-forrest-button.png" style="width: 88px;height: 31px;"></a>
+</div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="index.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Hadoop文档</h1>
+    
+<p>
+	下面的文档是一些概念介绍和操作教程,可帮助你开始使用Hadoop。如果遇到了问题,你可以向<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>求助或者浏览一下存档邮件。
+    </p>
+    
+<ul>
+      
+<li>
+<a href="quickstart.html">Hadoop快速入门</a>
+</li>
+      
+<li>
+<a href="cluster_setup.html">Hadoop集群搭建</a>
+</li>
+      
+<li>
+<a href="hdfs_design.html">Hadoop分布式文件系统</a>
+</li>
+      
+<li>
+<a href="mapred_tutorial.html">Hadoop Map-Reduce教程</a>
+</li>
+      
+<li>
+<a href="native_libraries.html">Hadoop本地库</a>
+</li>
+      
+<li>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>
+</li>
+      
+<li>
+<a href="http://wiki.apache.org/hadoop/">维基</a>
+</li>
+      
+<li>
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>
+</li>
+    
+</ul>
+    
+<p>
+    
+</p>
+
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<div id="logos"></div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 160 - 0
docs/cn/index.pdf


+ 380 - 0
docs/cn/linkmap.html

@@ -0,0 +1,380 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Site Linkmap Table of Contents</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://hadoop.apache.org/core/">项目</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop">维基</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.18文档</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">文档</div>
+<div id="menu_1.1" class="menuitemgroup">
+<div class="menuitem">
+<a href="index.html">概述</a>
+</div>
+<div class="menuitem">
+<a href="quickstart.html">快速入门</a>
+</div>
+<div class="menuitem">
+<a href="cluster_setup.html">集群搭建</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_design.html">HDFS构架设计</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_user_guide.html">HDFS使用指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_permissions_guide.html">HDFS权限指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_quota_admin_guide.html">HDFS配额管理指南</a>
+</div>
+<div class="menuitem">
+<a href="commands_manual.html">命令手册</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_shell.html">FS Shell使用指南</a>
+</div>
+<div class="menuitem">
+<a href="distcp.html">DistCp使用指南</a>
+</div>
+<div class="menuitem">
+<a href="mapred_tutorial.html">Map-Reduce教程</a>
+</div>
+<div class="menuitem">
+<a href="native_libraries.html">Hadoop本地库</a>
+</div>
+<div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
+<a href="hadoop_archives.html">Hadoop Archives</a>
+</div>
+<div class="menuitem">
+<a href="hod.html">Hadoop On Demand</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/">维基</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">发行说明</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">变更日志</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="linkmap.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Site Linkmap Table of Contents</h1>
+<p>
+          This is a map of the complete site and its structure.
+        </p>
+<ul>
+<li>
+<a>Hadoop</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>site</em>
+</li>
+<ul>
+
+  
+<ul>
+<li>
+<a>文档</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>docs</em>
+</li>
+<ul> 
+    
+<ul>
+<li>
+<a href="index.html">概述</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>overview</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="quickstart.html">快速入门</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>quickstart</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="cluster_setup.html">集群搭建</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>setup</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="hdfs_design.html">HDFS构架设计</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>hdfs</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="hdfs_user_guide.html">HDFS使用指南</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>hdfs</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="hdfs_permissions_guide.html">HDFS权限指南</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>hdfs</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="hdfs_quota_admin_guide.html">HDFS配额管理指南</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>hdfs</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="commands_manual.html">命令手册</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>commands</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="hdfs_shell.html">FS Shell使用指南</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>fs</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="distcp.html">DistCp使用指南</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>distcp</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="mapred_tutorial.html">Map-Reduce教程</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>mapred</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="native_libraries.html">Hadoop本地库</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>mapred</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="streaming.html">Streaming</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>streaming</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="hadoop_archives.html">Hadoop Archives</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>archives</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="hod.html">Hadoop On Demand</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>hod</em>
+</li>
+<ul>
+      
+      
+      
+    
+</ul>
+</ul>
+    
+<ul>
+<li>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>api</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>jdiff</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="http://wiki.apache.org/hadoop/">维基</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>wiki</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>faq</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>lists</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">发行说明</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>relnotes</em>
+</li>
+</ul>
+    
+<ul>
+<li>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">变更日志</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>changes</em>
+</li>
+</ul>
+  
+</ul>
+</ul>
+
+ 
+ 
+
+</ul>
+</ul>
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 62 - 0
docs/cn/linkmap.pdf


+ 3464 - 0
docs/cn/mapred_tutorial.html

@@ -0,0 +1,3464 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Hadoop Map/Reduce教程</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://hadoop.apache.org/core/">项目</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop">维基</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.18文档</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">文档</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">概述</a>
+</div>
+<div class="menuitem">
+<a href="quickstart.html">快速入门</a>
+</div>
+<div class="menuitem">
+<a href="cluster_setup.html">集群搭建</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_design.html">HDFS构架设计</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_user_guide.html">HDFS使用指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_permissions_guide.html">HDFS权限指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_quota_admin_guide.html">HDFS配额管理指南</a>
+</div>
+<div class="menuitem">
+<a href="commands_manual.html">命令手册</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_shell.html">FS Shell使用指南</a>
+</div>
+<div class="menuitem">
+<a href="distcp.html">DistCp使用指南</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">Map-Reduce教程</div>
+</div>
+<div class="menuitem">
+<a href="native_libraries.html">Hadoop本地库</a>
+</div>
+<div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
+<a href="hadoop_archives.html">Hadoop Archives</a>
+</div>
+<div class="menuitem">
+<a href="hod.html">Hadoop On Demand</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/">维基</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">发行说明</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">变更日志</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="mapred_tutorial.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Hadoop Map/Reduce教程</h1>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#%E7%9B%AE%E7%9A%84">目的</a>
+</li>
+<li>
+<a href="#%E5%85%88%E5%86%B3%E6%9D%A1%E4%BB%B6">先决条件</a>
+</li>
+<li>
+<a href="#%E6%A6%82%E8%BF%B0">概述</a>
+</li>
+<li>
+<a href="#%E8%BE%93%E5%85%A5%E4%B8%8E%E8%BE%93%E5%87%BA">输入与输出</a>
+</li>
+<li>
+<a href="#%E4%BE%8B%E5%AD%90%EF%BC%9AWordCount+v1.0">例子:WordCount v1.0</a>
+<ul class="minitoc">
+<li>
+<a href="#%E6%BA%90%E4%BB%A3%E7%A0%81">源代码</a>
+</li>
+<li>
+<a href="#%E7%94%A8%E6%B3%95">用法</a>
+</li>
+<li>
+<a href="#%E8%A7%A3%E9%87%8A">解释</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#Map%2FReduce+-+%E7%94%A8%E6%88%B7%E7%95%8C%E9%9D%A2">Map/Reduce - 用户界面</a>
+<ul class="minitoc">
+<li>
+<a href="#%E6%A0%B8%E5%BF%83%E5%8A%9F%E8%83%BD%E6%8F%8F%E8%BF%B0">核心功能描述</a>
+<ul class="minitoc">
+<li>
+<a href="#Mapper">Mapper</a>
+</li>
+<li>
+<a href="#Reducer">Reducer</a>
+</li>
+<li>
+<a href="#Partitioner">Partitioner</a>
+</li>
+<li>
+<a href="#Reporter">Reporter</a>
+</li>
+<li>
+<a href="#OutputCollector">OutputCollector</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#%E4%BD%9C%E4%B8%9A%E9%85%8D%E7%BD%AE">作业配置</a>
+</li>
+<li>
+<a href="#%E4%BB%BB%E5%8A%A1%E7%9A%84%E6%89%A7%E8%A1%8C%E5%92%8C%E7%8E%AF%E5%A2%83">任务的执行和环境</a>
+</li>
+<li>
+<a href="#%E4%BD%9C%E4%B8%9A%E7%9A%84%E6%8F%90%E4%BA%A4%E4%B8%8E%E7%9B%91%E6%8E%A7">作业的提交与监控</a>
+<ul class="minitoc">
+<li>
+<a href="#%E4%BD%9C%E4%B8%9A%E7%9A%84%E6%8E%A7%E5%88%B6">作业的控制</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#%E4%BD%9C%E4%B8%9A%E7%9A%84%E8%BE%93%E5%85%A5">作业的输入</a>
+<ul class="minitoc">
+<li>
+<a href="#InputSplit">InputSplit</a>
+</li>
+<li>
+<a href="#RecordReader">RecordReader</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#%E4%BD%9C%E4%B8%9A%E7%9A%84%E8%BE%93%E5%87%BA">作业的输出</a>
+<ul class="minitoc">
+<li>
+<a href="#%E4%BB%BB%E5%8A%A1%E7%9A%84Side-Effect+File">任务的Side-Effect File</a>
+</li>
+<li>
+<a href="#RecordWriter">RecordWriter</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#%E5%85%B6%E4%BB%96%E6%9C%89%E7%94%A8%E7%9A%84%E7%89%B9%E6%80%A7">其他有用的特性</a>
+<ul class="minitoc">
+<li>
+<a href="#Counters">Counters</a>
+</li>
+<li>
+<a href="#DistributedCache">DistributedCache</a>
+</li>
+<li>
+<a href="#Tool">Tool</a>
+</li>
+<li>
+<a href="#IsolationRunner">IsolationRunner</a>
+</li>
+<li>
+<a href="#Profiling">Profiling</a>
+</li>
+<li>
+<a href="#%E8%B0%83%E8%AF%95">调试</a>
+</li>
+<li>
+<a href="#JobControl">JobControl</a>
+</li>
+<li>
+<a href="#%E6%95%B0%E6%8D%AE%E5%8E%8B%E7%BC%A9">数据压缩</a>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+<li>
+<a href="#%E4%BE%8B%E5%AD%90%EF%BC%9AWordCount+v2.0">例子:WordCount v2.0</a>
+<ul class="minitoc">
+<li>
+<a href="#%E6%BA%90%E4%BB%A3%E7%A0%81-N10DC0">源代码</a>
+</li>
+<li>
+<a href="#%E8%BF%90%E8%A1%8C%E6%A0%B7%E4%BE%8B">运行样例</a>
+</li>
+<li>
+<a href="#%E7%A8%8B%E5%BA%8F%E8%A6%81%E7%82%B9">程序要点</a>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+  
+    
+<a name="N1000D"></a><a name="%E7%9B%AE%E7%9A%84"></a>
+<h2 class="h3">目的</h2>
+<div class="section">
+<p>这篇教程从用户的角度出发,全面地介绍了Hadoop Map/Reduce框架的各个方面。</p>
+</div>
+    
+    
+<a name="N10017"></a><a name="%E5%85%88%E5%86%B3%E6%9D%A1%E4%BB%B6"></a>
+<h2 class="h3">先决条件</h2>
+<div class="section">
+<p>请先确认Hadoop被正确安装、配置和正常运行中。更多信息见:</p>
+<ul>
+        
+<li>
+          
+<a href="quickstart.html">Hadoop快速入门</a>对初次使用者。
+        </li>
+        
+<li>
+          
+<a href="cluster_setup.html">Hadoop集群搭建</a>对大规模分布式集群。
+        </li>
+      
+</ul>
+</div>
+    
+    
+<a name="N10032"></a><a name="%E6%A6%82%E8%BF%B0"></a>
+<h2 class="h3">概述</h2>
+<div class="section">
+<p>Hadoop Map/Reduce是一个使用简易的软件框架,基于它写出来的应用程序能够运行在由上千个商用机器组成的大型集群上,并以一种可靠容错的方式并行处理上T级别的数据集。</p>
+<p>一个Map/Reduce <em>作业(job)</em> 通常会把输入的数据集切分为若干独立的数据块,由
+      <em>map任务(task)</em>以完全并行的方式处理它们。框架会对map的输出先进行排序,
+      然后把结果输入给<em>reduce任务</em>。通常作业的输入和输出都会被存储在文件系统中。
+      整个框架负责任务的调度和监控,以及重新执行已经失败的任务。</p>
+<p>通常,Map/Reduce框架和<a href="hdfs_design.html">分布式文件系统</a>是运行在一组相同的节点上的,也就是说,计算节点和存储节点通常在一起。这种配置允许框架在那些已经存好数据的节点上高效地调度任务,这可以使整个集群的网络带宽被非常高效地利用。</p>
+<p>Map/Reduce框架由一个单独的master <span class="codefrag">JobTracker</span> 和每个集群节点一个slave <span class="codefrag">TaskTracker</span>共同组成。master负责调度构成一个作业的所有任务,这些任务分布在不同的slave上,master监控它们的执行,重新执行已经失败的任务。而slave仅负责执行由master指派的任务。</p>
+<p>应用程序至少应该指明输入/输出的位置(路径),并通过实现合适的接口或抽象类提供map和reduce函数。再加上其他作业的参数,就构成了<em>作业配置(job configuration)</em>。然后,Hadoop的 <em>job client</em>提交作业(jar包/可执行程序等)和配置信息给<span class="codefrag">JobTracker</span>,后者负责分发这些软件和配置信息给slave、调度任务并监控它们的执行,同时提供状态和诊断信息给job-client。</p>
+<p>虽然Hadoop框架是用Java<sup>TM</sup>实现的,但Map/Reduce应用程序则不一定要用
+      Java来写 。</p>
+<ul>
+        
+<li>
+          
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/streaming/package-summary.html">
+          Hadoop Streaming</a>是一种运行作业的实用工具,它允许用户创建和运行任何可执行程序
+          (例如:Shell工具)来做为mapper和reducer。
+        </li>
+        
+<li>
+          
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/pipes/package-summary.html">
+          Hadoop Pipes</a>是一个与<a href="http://www.swig.org/">SWIG</a>兼容的C++ API
+          (没有基于JNI<sup>TM</sup>技术),它也可用于实现Map/Reduce应用程序。
+        </li>
+      
+</ul>
+</div>
+    
+    
+<a name="N10082"></a><a name="%E8%BE%93%E5%85%A5%E4%B8%8E%E8%BE%93%E5%87%BA"></a>
+<h2 class="h3">输入与输出</h2>
+<div class="section">
+<p>Map/Reduce框架运转在<span class="codefrag">&lt;key, value&gt;</span> 键值对上,也就是说,
+      框架把作业的输入看为是一组<span class="codefrag">&lt;key, value&gt;</span> 键值对,同样也产出一组
+      <span class="codefrag">&lt;key, value&gt;</span> 键值对做为作业的输出,这两组键值对的类型可能不同。</p>
+<p>框架需要对<span class="codefrag">key</span>和<span class="codefrag">value</span>的类(classes)进行序列化操作,
+      因此,这些类需要实现 <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/io/Writable.html">Writable</a>接口。
+      另外,为了方便框架执行排序操作,<span class="codefrag">key</span>类必须实现
+      <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/io/WritableComparable.html">
+      WritableComparable</a>接口。
+      </p>
+<p>一个Map/Reduce 作业的输入和输出类型如下所示:</p>
+<p>
+        (input) <span class="codefrag">&lt;k1, v1&gt;</span> 
+        -&gt; 
+        <strong>map</strong> 
+        -&gt; 
+        <span class="codefrag">&lt;k2, v2&gt;</span> 
+        -&gt; 
+        <strong>combine</strong> 
+        -&gt; 
+        <span class="codefrag">&lt;k2, v2&gt;</span> 
+        -&gt; 
+        <strong>reduce</strong> 
+        -&gt; 
+        <span class="codefrag">&lt;k3, v3&gt;</span> (output)
+      </p>
+</div>
+
+    
+<a name="N100C4"></a><a name="%E4%BE%8B%E5%AD%90%EF%BC%9AWordCount+v1.0"></a>
+<h2 class="h3">例子:WordCount v1.0</h2>
+<div class="section">
+<p>在深入细节之前,让我们先看一个Map/Reduce的应用示例,以便对它们的工作方式有一个初步的认识。</p>
+<p>
+<span class="codefrag">WordCount</span>是一个简单的应用,它可以计算出指定数据集中每一个单词出现的次数。</p>
+<p>这个应用适用于
+      <a href="quickstart.html#Standalone+Operation">单机模式</a>,
+      <a href="quickstart.html#SingleNodeSetup">伪分布式模式</a> 或
+      <a href="quickstart.html#Fully-Distributed+Operation">完全分布式模式</a> 
+      三种Hadoop安装方式。</p>
+<a name="N100E1"></a><a name="%E6%BA%90%E4%BB%A3%E7%A0%81"></a>
+<h3 class="h4">源代码</h3>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+          
+<tr>
+            
+<th colspan="1" rowspan="1"></th>
+            <th colspan="1" rowspan="1">WordCount.java</th>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">1.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">package org.myorg;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">2.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">3.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">import java.io.IOException;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">4.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">import java.util.*;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">5.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">6.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">import org.apache.hadoop.fs.Path;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">7.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">import org.apache.hadoop.conf.*;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">8.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">import org.apache.hadoop.io.*;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">9.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">import org.apache.hadoop.mapred.*;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">10.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">import org.apache.hadoop.util.*;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">11.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">12.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">public class WordCount {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">13.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">14.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;
+              <span class="codefrag">
+                public static class Map extends MapReduceBase 
+                implements Mapper&lt;LongWritable, Text, Text, IntWritable&gt; {
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">15.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                private final static IntWritable one = new IntWritable(1);
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">16.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">private Text word = new Text();</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">17.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">18.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                public void map(LongWritable key, Text value, 
+                OutputCollector&lt;Text, IntWritable&gt; output, 
+                Reporter reporter) throws IOException {
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">19.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">String line = value.toString();</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">20.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">StringTokenizer tokenizer = new StringTokenizer(line);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">21.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">while (tokenizer.hasMoreTokens()) {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">22.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">word.set(tokenizer.nextToken());</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">23.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">output.collect(word, one);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">24.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">25.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">26.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">27.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">28.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;
+              <span class="codefrag">
+                public static class Reduce extends MapReduceBase implements 
+                Reducer&lt;Text, IntWritable, Text, IntWritable&gt; {
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">29.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                public void reduce(Text key, Iterator&lt;IntWritable&gt; values,
+                OutputCollector&lt;Text, IntWritable&gt; output, 
+                Reporter reporter) throws IOException {
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">30.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">int sum = 0;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">31.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">while (values.hasNext()) {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">32.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">sum += values.next().get();</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">33.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">34.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">output.collect(key, new IntWritable(sum));</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">35.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">36.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">37.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">38.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;
+              <span class="codefrag">
+                public static void main(String[] args) throws Exception {
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">39.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                JobConf conf = new JobConf(WordCount.class);
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">40.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">conf.setJobName("wordcount");</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">41.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">42.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">conf.setOutputKeyClass(Text.class);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">43.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">conf.setOutputValueClass(IntWritable.class);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">44.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">45.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">conf.setMapperClass(Map.class);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">46.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">conf.setCombinerClass(Reduce.class);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">47.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">conf.setReducerClass(Reduce.class);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">48.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">49.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">conf.setInputFormat(TextInputFormat.class);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">50.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">conf.setOutputFormat(TextOutputFormat.class);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">51.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">52.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">FileInputFormat.setInputPaths(conf, new Path(args[0]));</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">53.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">FileOutputFormat.setOutputPath(conf, new Path(args[1]));</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">54.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">55.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">JobClient.runJob(conf);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">57.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">58.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">59.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+        
+</table>
+<a name="N10463"></a><a name="%E7%94%A8%E6%B3%95"></a>
+<h3 class="h4">用法</h3>
+<p>假设环境变量<span class="codefrag">HADOOP_HOME</span>对应安装时的根目录,<span class="codefrag">HADOOP_VERSION</span>对应Hadoop的当前安装版本,编译<span class="codefrag">WordCount.java</span>来创建jar包,可如下操作:</p>
+<p>
+          
+<span class="codefrag">$ mkdir wordcount_classes</span>
+<br>
+          
+<span class="codefrag">
+            $ javac -classpath ${HADOOP_HOME}/hadoop-${HADOOP_VERSION}-core.jar 
+              -d wordcount_classes WordCount.java
+          </span>
+<br>
+          
+<span class="codefrag">$ jar -cvf /usr/joe/wordcount.jar -C wordcount_classes/ .</span> 
+        
+</p>
+<p>假设:</p>
+<ul>
+          
+<li>
+            
+<span class="codefrag">/usr/joe/wordcount/input</span>  - 是HDFS中的输入路径
+          </li>
+          
+<li>
+            
+<span class="codefrag">/usr/joe/wordcount/output</span> - 是HDFS中的输出路径
+          </li>
+        
+</ul>
+<p>用示例文本文件做为输入:</p>
+<p>
+          
+<span class="codefrag">$ bin/hadoop dfs -ls /usr/joe/wordcount/input/</span>
+<br>
+          
+<span class="codefrag">/usr/joe/wordcount/input/file01</span>
+<br>
+          
+<span class="codefrag">/usr/joe/wordcount/input/file02</span>
+<br>
+          
+<br>
+          
+<span class="codefrag">$ bin/hadoop dfs -cat /usr/joe/wordcount/input/file01</span>
+<br>
+          
+<span class="codefrag">Hello World Bye World</span>
+<br>
+          
+<br>
+          
+<span class="codefrag">$ bin/hadoop dfs -cat /usr/joe/wordcount/input/file02</span>
+<br>
+          
+<span class="codefrag">Hello Hadoop Goodbye Hadoop</span>
+        
+</p>
+<p>运行应用程序:</p>
+<p>
+          
+<span class="codefrag">
+            $ bin/hadoop jar /usr/joe/wordcount.jar org.myorg.WordCount 
+              /usr/joe/wordcount/input /usr/joe/wordcount/output 
+          </span>
+        
+</p>
+<p>输出是:</p>
+<p>
+          
+<span class="codefrag">
+            $ bin/hadoop dfs -cat /usr/joe/wordcount/output/part-00000
+          </span>
+          
+<br>
+          
+<span class="codefrag">Bye    1</span>
+<br>
+          
+<span class="codefrag">Goodbye    1</span>
+<br>
+          
+<span class="codefrag">Hadoop    2</span>
+<br>
+          
+<span class="codefrag">Hello    2</span>
+<br>
+          
+<span class="codefrag">World    2</span>
+<br>
+        
+</p>
+<p> 应用程序能够使用<span class="codefrag">-files</span>选项来指定一个由逗号分隔的路径列表,这些路径是task的当前工作目录。使用选项<span class="codefrag">-libjars</span>可以向map和reduce的classpath中添加jar包。使用<span class="codefrag">-archives</span>选项程序可以传递档案文件做为参数,这些档案文件会被解压并且在task的当前工作目录下会创建一个指向解压生成的目录的符号链接(以压缩包的名字命名)。
+        有关命令行选项的更多细节请参考
+        <a href="commands_manual.html">Commands manual</a>。</p>
+<p>使用<span class="codefrag">-libjars</span>和<span class="codefrag">-files</span>运行<span class="codefrag">wordcount</span>例子:<br>
+        
+<span class="codefrag"> hadoop jar hadoop-examples.jar wordcount -files cachefile.txt
+        -libjars mylib.jar input output </span>
+        
+</p>
+<a name="N10504"></a><a name="%E8%A7%A3%E9%87%8A"></a>
+<h3 class="h4">解释</h3>
+<p>
+<span class="codefrag">WordCount</span>应用程序非常直截了当。</p>
+<p>
+<span class="codefrag">Mapper</span>(14-26行)中的<span class="codefrag">map</span>方法(18-25行)通过指定的
+        <span class="codefrag">TextInputFormat</span>(49行)一次处理一行。然后,它通过<span class="codefrag">StringTokenizer</span>
+        以空格为分隔符将一行切分为若干tokens,之后,输出<span class="codefrag">&lt; &lt;word&gt;, 1&gt;</span>
+        形式的键值对。</p>
+<p>
+        对于示例中的第一个输入,map输出是:<br>
+          
+<span class="codefrag">&lt; Hello, 1&gt;</span>
+<br>
+          
+<span class="codefrag">&lt; World, 1&gt;</span>
+<br>
+          
+<span class="codefrag">&lt; Bye, 1&gt;</span>
+<br>
+          
+<span class="codefrag">&lt; World, 1&gt;</span>
+<br>
+        
+</p>
+<p>
+          第二个输入,map输出是:<br>
+          
+<span class="codefrag">&lt; Hello, 1&gt;</span>
+<br>
+          
+<span class="codefrag">&lt; Hadoop, 1&gt;</span>
+<br>
+          
+<span class="codefrag">&lt; Goodbye, 1&gt;</span>
+<br>
+          
+<span class="codefrag">&lt; Hadoop, 1&gt;</span>
+<br>
+        
+</p>
+<p>关于组成一个指定作业的map数目的确定,以及如何以更精细的方式去控制这些map,我们将在教程的后续部分学习到更多的内容。</p>
+<p>
+<span class="codefrag">WordCount</span>还指定了一个<span class="codefrag">combiner</span> (46行)。因此,每次map运行之后,会对输出按照<em>key</em>进行排序,然后把输出传递给本地的combiner(按照作业的配置与Reducer一样),进行本地聚合。</p>
+<p>
+         第一个map的输出是:<br>
+          
+<span class="codefrag">&lt; Bye, 1&gt;</span>
+<br>
+          
+<span class="codefrag">&lt; Hello, 1&gt;</span>
+<br>
+          
+<span class="codefrag">&lt; World, 2&gt;</span>
+<br>
+        
+</p>
+<p>
+          第二个map的输出是:<br>
+          
+<span class="codefrag">&lt; Goodbye, 1&gt;</span>
+<br>
+          
+<span class="codefrag">&lt; Hadoop, 2&gt;</span>
+<br>
+          
+<span class="codefrag">&lt; Hello, 1&gt;</span>
+<br>
+        
+</p>
+<p>
+<span class="codefrag">Reducer</span>(28-36行)中的<span class="codefrag">reduce</span>方法(29-35行)
+        仅是将每个key(本例中就是单词)出现的次数求和。
+        </p>
+<p>
+          因此这个作业的输出就是:<br>
+          
+<span class="codefrag">&lt; Bye, 1&gt;</span>
+<br>
+          
+<span class="codefrag">&lt; Goodbye, 1&gt;</span>
+<br>
+          
+<span class="codefrag">&lt; Hadoop, 2&gt;</span>
+<br>
+          
+<span class="codefrag">&lt; Hello, 2&gt;</span>
+<br>
+          
+<span class="codefrag">&lt; World, 2&gt;</span>
+<br>
+        
+</p>
+<p>代码中的<span class="codefrag">run</span>方法中指定了作业的几个方面,
+        例如:通过命令行传递过来的输入/输出路径、key/value的类型、输入/输出的格式等等<span class="codefrag">JobConf</span>中的配置信息。随后程序调用了<span class="codefrag">JobClient.runJob</span>(55行)来提交作业并且监控它的执行。</p>
+<p>我们将在本教程的后续部分学习更多的关于<span class="codefrag">JobConf</span>, <span class="codefrag">JobClient</span>,
+        <span class="codefrag">Tool</span>和其他接口及类(class)。</p>
+</div>
+    
+    
+<a name="N105B5"></a><a name="Map%2FReduce+-+%E7%94%A8%E6%88%B7%E7%95%8C%E9%9D%A2"></a>
+<h2 class="h3">Map/Reduce - 用户界面</h2>
+<div class="section">
+<p>这部分文档为用户将会面临的Map/Reduce框架中的各个环节提供了适当的细节。这应该会帮助用户更细粒度地去实现、配置和调优作业。然而,请注意每个类/接口的javadoc文档提供最全面的文档;本文只是想起到指南的作用。
+      </p>
+<p>我们会先看看<span class="codefrag">Mapper</span>和<span class="codefrag">Reducer</span>接口。应用程序通常会通过提供<span class="codefrag">map</span>和<span class="codefrag">reduce</span>方法来实现它们。
+      </p>
+<p>然后,我们会讨论其他的核心接口,其中包括:
+      <span class="codefrag">JobConf</span>,<span class="codefrag">JobClient</span>,<span class="codefrag">Partitioner</span>, 
+      <span class="codefrag">OutputCollector</span>,<span class="codefrag">Reporter</span>, 
+      <span class="codefrag">InputFormat</span>,<span class="codefrag">OutputFormat</span>等等。</p>
+<p>最后,我们将通过讨论框架中一些有用的功能点(例如:<span class="codefrag">DistributedCache</span>, 
+      <span class="codefrag">IsolationRunner</span>等等)来收尾。</p>
+<a name="N105EE"></a><a name="%E6%A0%B8%E5%BF%83%E5%8A%9F%E8%83%BD%E6%8F%8F%E8%BF%B0"></a>
+<h3 class="h4">核心功能描述</h3>
+<p>应用程序通常会通过提供<span class="codefrag">map</span>和<span class="codefrag">reduce</span>来实现
+        <span class="codefrag">Mapper</span>和<span class="codefrag">Reducer</span>接口,它们组成作业的核心。</p>
+<a name="N10603"></a><a name="Mapper"></a>
+<h4>Mapper</h4>
+<p>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/Mapper.html">
+          Mapper</a>将输入键值对(key/value pair)映射到一组中间格式的键值对集合。</p>
+<p>Map是一类将输入记录集转换为中间格式记录集的独立任务。
+          这种转换的中间格式记录集不需要与输入记录集的类型一致。一个给定的输入键值对可以映射成0个或多个输出键值对。</p>
+<p>Hadoop Map/Reduce框架为每一个<span class="codefrag">InputSplit</span>产生一个map任务,而每个<span class="codefrag">InputSplit</span>是由该作业的<span class="codefrag">InputFormat</span>产生的。</p>
+<p>概括地说,对<span class="codefrag">Mapper</span>的实现者需要重写
+          <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConfigurable.html#configure(org.apache.hadoop.mapred.JobConf)">
+		  JobConfigurable.configure(JobConf)</a>方法,这个方法需要传递一个<span class="codefrag">JobConf</span>参数,目的是完成Mapper的初始化工作。然后,框架为这个任务的<span class="codefrag">InputSplit</span>中每个键值对调用一次
+	  <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/Mapper.html#map(K1, V1, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)">
+		  map(WritableComparable, Writable, OutputCollector, Reporter)</a>操作。应用程序可以通过重写<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/io/Closeable.html#close()">Closeable.close()</a>方法来执行相应的清理工作。</p>
+<p>输出键值对不需要与输入键值对的类型一致。一个给定的输入键值对可以映射成0个或多个输出键值对。通过调用<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/OutputCollector.html#collect(K, V)">
+          OutputCollector.collect(WritableComparable,Writable)</a>可以收集输出的键值对。</p>
+<p>应用程序可以使用<span class="codefrag">Reporter</span>报告进度,设定应用级别的状态消息,更新<span class="codefrag">Counters</span>(计数器),或者仅是表明自己运行正常。</p>
+<p>框架随后会把与一个特定key关联的所有中间过程的值(value)分成组,然后把它们传给<span class="codefrag">Reducer</span>以产出最终的结果。用户可以通过
+          <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setOutputKeyComparatorClass(java.lang.Class)">
+          JobConf.setOutputKeyComparatorClass(Class)</a>来指定具体负责分组的
+          <span class="codefrag">Comparator</span>。</p>
+<p>
+<span class="codefrag">Mapper</span>的输出被排序后,就被划分给每个<span class="codefrag">Reducer</span>。分块的总数目和一个作业的reduce任务的数目是一样的。用户可以通过实现自定义的          <span class="codefrag">Partitioner</span>来控制哪个key被分配给哪个 <span class="codefrag">Reducer</span>。</p>
+<p>用户可选择通过<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setCombinerClass(java.lang.Class)">
+          JobConf.setCombinerClass(Class)</a>指定一个<span class="codefrag">combiner</span>,它负责对中间过程的输出进行本地的聚集,这会有助于降低从<span class="codefrag">Mapper</span>到
+          <span class="codefrag">Reducer</span>数据传输量。
+          </p>
+<p>这些被排好序的中间过程的输出结果保存的格式是(key-len, key, value-len, value),应用程序可以通过<span class="codefrag">JobConf</span>控制对这些中间结果是否进行压缩以及怎么压缩,使用哪种<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/io/compress/CompressionCodec.html">
+          CompressionCodec</a>。
+          </p>
+<a name="N1067B"></a><a name="%E9%9C%80%E8%A6%81%E5%A4%9A%E5%B0%91%E4%B8%AAMap%EF%BC%9F"></a>
+<h5>需要多少个Map?</h5>
+<p>Map的数目通常是由输入数据的大小决定的,一般就是所有输入文件的总块(block)数。</p>
+<p>Map正常的并行规模大致是每个节点(node)大约10到100个map,对于CPU
+            消耗较小的map任务可以设到300个左右。由于每个任务初始化需要一定的时间,因此,比较合理的情况是map执行的时间至少超过1分钟。</p>
+<p>这样,如果你输入10TB的数据,每个块(block)的大小是128MB,你将需要大约82,000个map来完成任务,除非使用
+            <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setNumMapTasks(int)">
+            setNumMapTasks(int)</a>(注意:这里仅仅是对框架进行了一个提示(hint),实际决定因素见<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setNumMapTasks(int)">这里</a>)将这个数值设置得更高。</p>
+<a name="N10694"></a><a name="Reducer"></a>
+<h4>Reducer</h4>
+<p>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/Reducer.html">
+          Reducer</a>将与一个key关联的一组中间数值集归约(reduce)为一个更小的数值集。</p>
+<p>用户可以通过<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setNumReduceTasks(int)">
+          JobConf.setNumReduceTasks(int)</a>设定一个作业中reduce任务的数目。</p>
+<p>概括地说,对<span class="codefrag">Reducer</span>的实现者需要重写
+          <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConfigurable.html#configure(org.apache.hadoop.mapred.JobConf)">
+          JobConfigurable.configure(JobConf)</a>方法,这个方法需要传递一个<span class="codefrag">JobConf</span>参数,目的是完成Reducer的初始化工作。然后,框架为成组的输入数据中的每个<span class="codefrag">&lt;key, (list of values)&gt;</span>对调用一次
+          <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/Reducer.html#reduce(K2, java.util.Iterator, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)">
+          reduce(WritableComparable, Iterator, OutputCollector, Reporter)</a>方法。之后,应用程序可以通过重写<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/io/Closeable.html#close()">Closeable.close()</a>来执行相应的清理工作。</p>
+<p>
+<span class="codefrag">Reducer</span>有3个主要阶段:shuffle、sort和reduce。
+          </p>
+<a name="N106C4"></a><a name="Shuffle"></a>
+<h5>Shuffle</h5>
+<p>
+<span class="codefrag">Reducer</span>的输入就是Mapper已经排好序的输出。在这个阶段,框架通过HTTP为每个Reducer获得所有Mapper输出中与之相关的分块。</p>
+<a name="N106D0"></a><a name="Sort"></a>
+<h5>Sort</h5>
+<p>这个阶段,框架将按照key的值对<span class="codefrag">Reducer</span>的输入进行分组
+            (因为不同mapper的输出中可能会有相同的key)。</p>
+<p>Shuffle和Sort两个阶段是同时进行的;map的输出也是一边被取回一边被合并的。</p>
+<a name="N106DF"></a><a name="Secondary+Sort"></a>
+<h5>Secondary Sort</h5>
+<p>如果需要中间过程对key的分组规则和reduce前对key的分组规则不同,那么可以通过<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setOutputValueGroupingComparator(java.lang.Class)">
+              JobConf.setOutputValueGroupingComparator(Class)</a>来指定一个<span class="codefrag">Comparator</span>。再加上
+              <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setOutputKeyComparatorClass(java.lang.Class)">
+              JobConf.setOutputKeyComparatorClass(Class)</a>可用于控制中间过程的key如何被分组,所以结合两者可以实现<em>按值的二次排序</em>。
+              </p>
+<a name="N106F8"></a><a name="Reduce"></a>
+<h5>Reduce</h5>
+<p>在这个阶段,框架为已分组的输入数据中的每个
+          <span class="codefrag">&lt;key, (list of values)&gt;</span>对调用一次
+          <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/Reducer.html#reduce(K2, java.util.Iterator, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)">
+          reduce(WritableComparable, Iterator, OutputCollector, Reporter)</a>方法。</p>
+<p>Reduce任务的输出通常是通过调用
+            <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/OutputCollector.html#collect(K, V)">
+            OutputCollector.collect(WritableComparable, Writable)</a>写入
+            <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/fs/FileSystem.html">
+            文件系统</a>的。</p>
+<p>应用程序可以使用<span class="codefrag">Reporter</span>报告进度,设定应用程序级别的状态消息,更新<span class="codefrag">Counters</span>(计数器),或者仅是表明自己运行正常。</p>
+<p>
+<span class="codefrag">Reducer</span>的输出是<em>没有排序的</em>。</p>
+<a name="N10725"></a><a name="%E9%9C%80%E8%A6%81%E5%A4%9A%E5%B0%91%E4%B8%AAReduce%EF%BC%9F"></a>
+<h5>需要多少个Reduce?</h5>
+<p>Reduce的数目建议是<span class="codefrag">0.95</span>或<span class="codefrag">1.75</span>乘以
+            (&lt;<em>no. of nodes</em>&gt; * 
+            <span class="codefrag">mapred.tasktracker.reduce.tasks.maximum</span>)。
+            </p>
+<p>用0.95,所有reduce可以在maps一完成时就立刻启动,开始传输map的输出结果。用1.75,速度快的节点可以在完成第一轮reduce任务后,可以开始第二轮,这样可以得到比较好的负载均衡的效果。</p>
+<p>增加reduce的数目会增加整个框架的开销,但可以改善负载均衡,降低由于执行失败带来的负面影响。</p>
+<p>上述比例因子比整体数目稍小一些是为了给框架中的推测性任务(speculative-tasks)
+            或失败的任务预留一些reduce的资源。</p>
+<a name="N10744"></a><a name="%E6%97%A0Reducer"></a>
+<h5>无Reducer</h5>
+<p>如果没有归约要进行,那么设置reduce任务的数目为<em>零</em>是合法的。</p>
+<p>这种情况下,map任务的输出会直接被写入由
+            <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileOutputFormat.html#setOutputPath(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.fs.Path)">
+		    setOutputPath(Path)</a>指定的输出路径。框架在把它们写入<span class="codefrag">FileSystem</span>之前没有对它们进行排序。
+            </p>
+<a name="N1075C"></a><a name="Partitioner"></a>
+<h4>Partitioner</h4>
+<p>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/Partitioner.html">
+          Partitioner</a>用于划分键值空间(key space)。</p>
+<p>Partitioner负责控制map输出结果key的分割。Key(或者一个key子集)被用于产生分区,通常使用的是Hash函数。分区的数目与一个作业的reduce任务的数目是一样的。因此,它控制将中间过程的key(也就是这条记录)应该发送给<span class="codefrag">m</span>个reduce任务中的哪一个来进行reduce操作。
+	  </p>
+<p>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/lib/HashPartitioner.html">
+          HashPartitioner</a>是默认的 <span class="codefrag">Partitioner</span>。  </p>
+<a name="N10778"></a><a name="Reporter"></a>
+<h4>Reporter</h4>
+<p>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/Reporter.html">
+          Reporter</a>是用于Map/Reduce应用程序报告进度,设定应用级别的状态消息,
+          更新<span class="codefrag">Counters</span>(计数器)的机制。</p>
+<p>
+<span class="codefrag">Mapper</span>和<span class="codefrag">Reducer</span>的实现可以利用<span class="codefrag">Reporter</span>
+          来报告进度,或者仅是表明自己运行正常。在那种应用程序需要花很长时间处理个别键值对的场景中,这种机制是很关键的,因为框架可能会以为这个任务超时了,从而将它强行杀死。另一个避免这种情况发生的方式是,将配置参数<span class="codefrag">mapred.task.timeout</span>设置为一个足够高的值(或者干脆设置为零,则没有超时限制了)。
+          </p>
+<p>应用程序可以用<span class="codefrag">Reporter</span>来更新<span class="codefrag">Counter</span>(计数器)。
+          </p>
+<a name="N1079F"></a><a name="OutputCollector"></a>
+<h4>OutputCollector</h4>
+<p>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/OutputCollector.html">
+          OutputCollector</a>是一个Map/Reduce框架提供的用于收集
+          <span class="codefrag">Mapper</span>或<span class="codefrag">Reducer</span>输出数据的通用机制
+          (包括中间输出结果和作业的输出结果)。</p>
+<p>Hadoop Map/Reduce框架附带了一个包含许多实用型的mapper、reducer和partitioner
+        的<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/lib/package-summary.html">类库</a>。</p>
+<a name="N107BA"></a><a name="%E4%BD%9C%E4%B8%9A%E9%85%8D%E7%BD%AE"></a>
+<h3 class="h4">作业配置</h3>
+<p>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html">
+        JobConf</a>代表一个Map/Reduce作业的配置。</p>
+<p>
+<span class="codefrag">JobConf</span>是用户向Hadoop框架描述一个Map/Reduce作业如何执行的主要接口。框架会按照<span class="codefrag">JobConf</span>描述的信息忠实地去尝试完成这个作业,然而:</p>
+<ul>
+          
+<li>
+            一些参数可能会被管理者标记为<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/conf/Configuration.html#FinalParams">
+            final</a>,这意味它们不能被更改。
+          </li>
+          
+<li>
+          一些作业的参数可以被直截了当地进行设置(例如:
+          <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setNumReduceTasks(int)">
+            setNumReduceTasks(int)</a>),而另一些参数则与框架或者作业的其他参数之间微妙地相互影响,并且设置起来比较复杂(例如:<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setNumMapTasks(int)">
+            setNumMapTasks(int)</a>)。
+          </li>
+        
+</ul>
+<p>通常,<span class="codefrag">JobConf</span>会指明<span class="codefrag">Mapper</span>、Combiner(如果有的话)、
+        <span class="codefrag">Partitioner</span>、<span class="codefrag">Reducer</span>、<span class="codefrag">InputFormat</span>和 
+        <span class="codefrag">OutputFormat</span>的具体实现。<span class="codefrag">JobConf</span>还能指定一组输入文件
+        (<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileInputFormat.html#setInputPaths(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.fs.Path[])">setInputPaths(JobConf, Path...)</a>
+        /<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileInputFormat.html#addInputPath(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.fs.Path)">addInputPath(JobConf, Path)</a>)
+        和(<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileInputFormat.html#setInputPaths(org.apache.hadoop.mapred.JobConf,%20java.lang.String)">setInputPaths(JobConf, String)</a>
+        /<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileInputFormat.html#addInputPath(org.apache.hadoop.mapred.JobConf,%20java.lang.String)">addInputPaths(JobConf, String)</a>)
+        以及输出文件应该写在哪儿
+        (<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileOutputFormat.html#setOutputPath(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.fs.Path)">setOutputPath(Path)</a>)。</p>
+<p>
+<span class="codefrag">JobConf</span>可选择地对作业设置一些高级选项,例如:设置<span class="codefrag">Comparator</span>;
+        放到<span class="codefrag">DistributedCache</span>上的文件;中间结果或者作业输出结果是否需要压缩以及怎么压缩;
+        利用用户提供的脚本(<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setMapDebugScript(java.lang.String)">setMapDebugScript(String)</a>/<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setReduceDebugScript(java.lang.String)">setReduceDebugScript(String)</a>)     
+        进行调试;作业是否允许<em>预防性(speculative)</em>任务的执行
+        (<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setMapSpeculativeExecution(boolean)">setMapSpeculativeExecution(boolean)</a>)/(<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setReduceSpeculativeExecution(boolean)">setReduceSpeculativeExecution(boolean)</a>)
+        ;每个任务最大的尝试次数
+        (<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setMaxMapAttempts(int)">setMaxMapAttempts(int)</a>/<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setMaxReduceAttempts(int)">setMaxReduceAttempts(int)</a>)
+        ;一个作业能容忍的任务失败的百分比
+        (<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setMaxMapTaskFailuresPercent(int)">setMaxMapTaskFailuresPercent(int)</a>/<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setMaxReduceTaskFailuresPercent(int)">setMaxReduceTaskFailuresPercent(int)</a>) 
+        ;等等。</p>
+<p>当然,用户能使用
+        <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/conf/Configuration.html#set(java.lang.String, java.lang.String)">set(String, String)</a>/<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/conf/Configuration.html#get(java.lang.String, java.lang.String)">get(String, String)</a>
+        来设置或者取得应用程序需要的任意参数。然而,<span class="codefrag">DistributedCache</span>的使用是面向大规模只读数据的。</p>
+<a name="N1084C"></a><a name="%E4%BB%BB%E5%8A%A1%E7%9A%84%E6%89%A7%E8%A1%8C%E5%92%8C%E7%8E%AF%E5%A2%83"></a>
+<h3 class="h4">任务的执行和环境</h3>
+<p>
+<span class="codefrag">TaskTracker</span>是在一个单独的jvm上以子进程的形式执行
+        <span class="codefrag">Mapper</span>/<span class="codefrag">Reducer</span>任务(Task)的。
+        </p>
+<p>子任务会继承父<span class="codefrag">TaskTracker</span>的环境。用户可以通过JobConf中的
+        <span class="codefrag">mapred.child.java.opts</span>配置参数来设定子jvm上的附加选项,例如:
+        通过<span class="codefrag">-Djava.library.path=&lt;&gt;</span> 将一个非标准路径设为运行时的链接用以搜索共享库,等等。如果<span class="codefrag">mapred.child.java.opts</span>包含一个符号<em>@taskid@</em>,
+        它会被替换成map/reduce的taskid的值。</p>
+<p>下面是一个包含多个参数和替换的例子,其中包括:记录jvm GC日志;
+        JVM JMX代理程序以无密码的方式启动,这样它就能连接到jconsole上,从而可以查看子进程的内存和线程,得到线程的dump;还把子jvm的最大堆尺寸设置为512MB,
+        并为子jvm的<span class="codefrag">java.library.path</span>添加了一个附加路径。</p>
+<p>
+          
+<span class="codefrag">&lt;property&gt;</span>
+<br>
+          &nbsp;&nbsp;<span class="codefrag">&lt;name&gt;mapred.child.java.opts&lt;/name&gt;</span>
+<br>
+          &nbsp;&nbsp;<span class="codefrag">&lt;value&gt;</span>
+<br>
+          &nbsp;&nbsp;&nbsp;&nbsp;<span class="codefrag">
+                    -Xmx512M -Djava.library.path=/home/mycompany/lib
+                    -verbose:gc -Xloggc:/tmp/@taskid@.gc</span>
+<br>
+          &nbsp;&nbsp;&nbsp;&nbsp;<span class="codefrag">
+                    -Dcom.sun.management.jmxremote.authenticate=false 
+                    -Dcom.sun.management.jmxremote.ssl=false</span>
+<br>
+          &nbsp;&nbsp;<span class="codefrag">&lt;/value&gt;</span>
+<br>
+          
+<span class="codefrag">&lt;/property&gt;</span>
+        
+</p>
+<p>用户或管理员也可以使用<span class="codefrag">mapred.child.ulimit</span>设定运行的子任务的最大虚拟内存。<span class="codefrag">mapred.child.ulimit</span>的值以(KB)为单位,并且必须大于或等于-Xmx参数传给JavaVM的值,否则VM会无法启动。</p>
+<p>注意:<span class="codefrag">mapred.child.java.opts</span>只用于设置task tracker启动的子任务。为守护进程设置内存选项请查看
+        <a href="cluster_setup.html#%E9%85%8D%E7%BD%AEHadoop%E5%AE%88%E6%8A%A4%E8%BF%9B%E7%A8%8B%E7%9A%84%E8%BF%90%E8%A1%8C%E7%8E%AF%E5%A2%83">
+        cluster_setup.html </a>
+</p>
+<p>
+<span class="codefrag"> ${mapred.local.dir}/taskTracker/</span>是task tracker的本地目录,
+        用于创建本地缓存和job。它可以指定多个目录(跨越多个磁盘),文件会半随机的保存到本地路径下的某个目录。当job启动时,task tracker根据配置文档创建本地job目录,目录结构如以下所示:</p>
+<ul>
+	
+<li>
+<span class="codefrag">${mapred.local.dir}/taskTracker/archive/</span> :分布式缓存。这个目录保存本地的分布式缓存。因此本地分布式缓存是在所有task和job间共享的。</li>
+        
+<li>
+<span class="codefrag">${mapred.local.dir}/taskTracker/jobcache/$jobid/</span> :
+        本地job目录。
+        <ul>
+        
+<li>
+<span class="codefrag">${mapred.local.dir}/taskTracker/jobcache/$jobid/work/</span>:
+        job指定的共享目录。各个任务可以使用这个空间做为暂存空间,用于它们之间共享文件。这个目录通过<span class="codefrag">job.local.dir </span>参数暴露给用户。这个路径可以通过API <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#getJobLocalDir()">
+        JobConf.getJobLocalDir()</a>来访问。它也可以被做为系统属性获得。因此,用户(比如运行streaming)可以调用<span class="codefrag">System.getProperty("job.local.dir")</span>获得该目录。
+        </li>
+        
+<li>
+<span class="codefrag">${mapred.local.dir}/taskTracker/jobcache/$jobid/jars/</span>:
+        存放jar包的路径,用于存放作业的jar文件和展开的jar。<span class="codefrag">job.jar</span>是应用程序的jar文件,它会被自动分发到各台机器,在task启动前会被自动展开。使用api
+        <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#getJar()">
+        JobConf.getJar() </a>函数可以得到job.jar的位置。使用JobConf.getJar().getParent()可以访问存放展开的jar包的目录。
+        </li>
+        
+<li>
+<span class="codefrag">${mapred.local.dir}/taskTracker/jobcache/$jobid/job.xml</span>:
+        一个job.xml文件,本地的通用的作业配置文件。
+        </li>
+        
+<li>
+<span class="codefrag">${mapred.local.dir}/taskTracker/jobcache/$jobid/$taskid</span>:
+        每个任务有一个目录<span class="codefrag">task-id</span>,它里面有如下的目录结构:
+	<ul>
+        
+<li>
+<span class="codefrag">${mapred.local.dir}/taskTracker/jobcache/$jobid/$taskid/job.xml</span>:
+       一个job.xml文件,本地化的任务作业配置文件。任务本地化是指为该task设定特定的属性值。这些值会在下面具体说明。
+	</li>
+        
+<li>
+<span class="codefrag">${mapred.local.dir}/taskTracker/jobcache/$jobid/$taskid/output</span>
+        一个存放中间过程的输出文件的目录。它保存了由framwork产生的临时map reduce数据,比如map的输出文件等。</li>
+        
+<li>
+<span class="codefrag">${mapred.local.dir}/taskTracker/jobcache/$jobid/$taskid/work</span>:
+        task的当前工作目录。</li>
+        
+<li>
+<span class="codefrag">${mapred.local.dir}/taskTracker/jobcache/$jobid/$taskid/work/tmp</span>:
+        task的临时目录。(用户可以设定属性<span class="codefrag">mapred.child.tmp</span>
+        来为map和reduce task设定临时目录。缺省值是<span class="codefrag">./tmp</span>。如果这个值不是绝对路径,
+        它会把task的工作路径加到该路径前面作为task的临时文件路径。如果这个值是绝对路径则直接使用这个值。
+        如果指定的目录不存在,会自动创建该目录。之后,按照选项
+        <span class="codefrag">-Djava.io.tmpdir='临时文件的绝对路径'</span>执行java子任务。
+        pipes和streaming的临时文件路径是通过环境变量<span class="codefrag">TMPDIR='the absolute path of the tmp dir'</span>设定的)。
+        如果<span class="codefrag">mapred.child.tmp</span>有<span class="codefrag">./tmp</span>值,这个目录会被创建。</li>
+        
+</ul>
+        
+</li>
+        
+</ul>
+        
+</li>
+        
+</ul>
+<p>下面的属性是为每个task执行时使用的本地参数,它们保存在本地化的任务作业配置文件里:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+          
+<tr>
+<th colspan="1" rowspan="1">名称</th><th colspan="1" rowspan="1">类型</th><th colspan="1" rowspan="1">描述</th>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1">mapred.job.id</td><td colspan="1" rowspan="1">String</td><td colspan="1" rowspan="1">job id</td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1">mapred.jar</td><td colspan="1" rowspan="1">String</td>
+              <td colspan="1" rowspan="1">job目录下job.jar的位置</td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1">job.local.dir</td><td colspan="1" rowspan="1"> String</td>
+              <td colspan="1" rowspan="1">job指定的共享存储空间</td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1">mapred.tip.id</td><td colspan="1" rowspan="1"> String</td>
+              <td colspan="1" rowspan="1"> task id</td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1">mapred.task.id</td><td colspan="1" rowspan="1"> String</td>
+              <td colspan="1" rowspan="1"> task尝试id</td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1">mapred.task.is.map</td><td colspan="1" rowspan="1"> boolean </td>
+              <td colspan="1" rowspan="1">是否是map task</td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1">mapred.task.partition</td><td colspan="1" rowspan="1"> int </td>
+              <td colspan="1" rowspan="1">task在job中的id</td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1">map.input.file</td><td colspan="1" rowspan="1"> String</td>
+              <td colspan="1" rowspan="1"> map读取的文件名</td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1">map.input.start</td><td colspan="1" rowspan="1"> long</td>
+              <td colspan="1" rowspan="1"> map输入的数据块的起始位置偏移</td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1">map.input.length </td><td colspan="1" rowspan="1">long </td>
+              <td colspan="1" rowspan="1">map输入的数据块的字节数</td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1">mapred.work.output.dir</td><td colspan="1" rowspan="1"> String </td>
+              <td colspan="1" rowspan="1">task临时输出目录</td>
+</tr>
+        
+</table>
+<p>task的标准输出和错误输出流会被读到TaskTracker中,并且记录到
+        <span class="codefrag">${HADOOP_LOG_DIR}/userlogs</span>
+</p>
+<p>
+<a href="#DistributedCache">DistributedCache</a>
+        可用于map或reduce task中分发jar包和本地库。子jvm总是把
+        <em>当前工作目录</em> 加到
+        <span class="codefrag">java.library.path</span> 和 <span class="codefrag">LD_LIBRARY_PATH</span>。
+        因此,可以通过
+        <a href="http://java.sun.com/j2se/1.5.0/docs/api/java/lang/System.html#loadLibrary(java.lang.String)">
+        System.loadLibrary</a>或 
+        <a href="http://java.sun.com/j2se/1.5.0/docs/api/java/lang/System.html#load(java.lang.String)">
+        System.load</a>装载缓存的库。有关使用分布式缓存加载共享库的细节请参考
+        <a href="native_libraries.html#%E4%BD%BF%E7%94%A8DistributedCache+%E5%8A%A0%E8%BD%BD%E6%9C%AC%E5%9C%B0%E5%BA%93">
+        native_libraries.html</a>
+</p>
+<a name="N109E3"></a><a name="%E4%BD%9C%E4%B8%9A%E7%9A%84%E6%8F%90%E4%BA%A4%E4%B8%8E%E7%9B%91%E6%8E%A7"></a>
+<h3 class="h4">作业的提交与监控</h3>
+<p>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobClient.html">
+        JobClient</a>是用户提交的作业与<span class="codefrag">JobTracker</span>交互的主要接口。
+        </p>
+<p>
+<span class="codefrag">JobClient</span> 提供提交作业,追踪进程,访问子任务的日志记录,获得Map/Reduce集群状态信息等功能。
+        </p>
+<p>作业提交过程包括: </p>
+<ol>
+          
+<li>检查作业输入输出样式细节</li>
+          
+<li>为作业计算<span class="codefrag">InputSplit</span>值。</li>
+          
+<li>
+           如果需要的话,为作业的<span class="codefrag">DistributedCache</span>建立必须的统计信息。
+          </li>
+          
+<li>
+            拷贝作业的jar包和配置文件到<span class="codefrag">FileSystem</span>上的Map/Reduce系统目录下。
+          </li>
+          
+<li>
+            提交作业到<span class="codefrag">JobTracker</span>并且监控它的状态。
+          </li>
+        
+</ol>
+<p>作业的历史文件记录到指定目录的"_logs/history/"子目录下。这个指定目录由<span class="codefrag">hadoop.job.history.user.location</span>设定,默认是作业输出的目录。因此默认情况下,文件会存放在mapred.output.dir/_logs/history目录下。用户可以设置<span class="codefrag">hadoop.job.history.user.location</span>为<span class="codefrag">none</span>来停止日志记录。
+        </p>
+<p> 用户使用下面的命令可以看到在指定目录下的历史日志记录的摘要。
+        <br>
+        
+<span class="codefrag">$ bin/hadoop job -history output-dir</span>
+<br> 
+        这个命令会打印出作业的细节,以及失败的和被杀死的任务细节。<br>
+        要查看有关作业的更多细节例如成功的任务、每个任务尝试的次数(task attempt)等,可以使用下面的命令
+        <br>
+       
+<span class="codefrag">$ bin/hadoop job -history all output-dir</span>
+<br>
+</p>
+<p>用户可以使用 
+        <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/OutputLogFilter.html">OutputLogFilter</a>
+        从输出目录列表中筛选日志文件。</p>
+<p>一般情况,用户利用<span class="codefrag">JobConf</span>创建应用程序并配置作业属性,
+        然后用
+        <span class="codefrag">JobClient</span> 提交作业并监视它的进程。</p>
+<a name="N10A44"></a><a name="%E4%BD%9C%E4%B8%9A%E7%9A%84%E6%8E%A7%E5%88%B6"></a>
+<h4>作业的控制</h4>
+<p>有时候,用一个单独的Map/Reduce作业并不能完成一个复杂的任务,用户也许要链接多个Map/Reduce作业才行。这是容易实现的,因为作业通常输出到分布式文件系统上的,所以可以把这个作业的输出作为下一个作业的输入实现串联。
+          </p>
+<p>然而,这也意味着,确保每一作业完成(成功或失败)的责任就直接落在了客户身上。在这种情况下,可以用的控制作业的选项有:
+          </p>
+<ul>
+            
+<li>
+              
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobClient.html#runJob(org.apache.hadoop.mapred.JobConf)">
+              runJob(JobConf)</a>:提交作业,仅当作业完成时返回。
+            </li>
+            
+<li>
+              
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobClient.html#submitJob(org.apache.hadoop.mapred.JobConf)">
+              submitJob(JobConf)</a>:只提交作业,之后需要你轮询它返回的
+              <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/RunningJob.html">
+              RunningJob</a>句柄的状态,并根据情况调度。
+            </li>
+            
+<li>
+              
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setJobEndNotificationURI(java.lang.String)">
+              JobConf.setJobEndNotificationURI(String)</a>:设置一个作业完成通知,可避免轮询。
+           
+            </li>
+          
+</ul>
+<a name="N10A6E"></a><a name="%E4%BD%9C%E4%B8%9A%E7%9A%84%E8%BE%93%E5%85%A5"></a>
+<h3 class="h4">作业的输入</h3>
+<p>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/InputFormat.html">
+        InputFormat</a> 为Map/Reduce作业描述输入的细节规范。
+        </p>
+<p>Map/Reduce框架根据作业的<span class="codefrag">InputFormat</span>来: 
+        </p>
+<ol>
+          
+<li>检查作业输入的有效性。</li>
+          
+<li>
+            把输入文件切分成多个逻辑<span class="codefrag">InputSplit</span>实例,
+            并把每一实例分别分发给一个
+            <span class="codefrag">Mapper</span>。
+          </li>
+          
+<li>
+            提供<span class="codefrag">RecordReader</span>的实现,这个RecordReader从逻辑<span class="codefrag">InputSplit</span>中获得输入记录,
+		这些记录将由<span class="codefrag">Mapper</span>处理。 
+          </li>
+        
+</ol>
+<p>基于文件的<span class="codefrag">InputFormat</span>实现(通常是
+	<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileInputFormat.html">
+        FileInputFormat</a>的子类)
+	默认行为是按照输入文件的字节大小,把输入数据切分成逻辑分块(<em>logical</em> 
+        <span class="codefrag">InputSplit</span> )。	
+        其中输入文件所在的<span class="codefrag">FileSystem</span>的数据块尺寸是分块大小的上限。下限可以设置<span class="codefrag">mapred.min.split.size</span>
+	的值。</p>
+<p>考虑到边界情况,对于很多应用程序来说,很明显按照文件大小进行逻辑分割是不能满足需求的。
+        在这种情况下,应用程序需要实现一个<span class="codefrag">RecordReader</span>来处理记录的边界并为每个任务提供一个逻辑分块的面向记录的视图。
+        </p>
+<p>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/TextInputFormat.html">
+        TextInputFormat</a> 是默认的<span class="codefrag">InputFormat</span>。</p>
+<p>如果一个作业的<span class="codefrag">Inputformat</span>是<span class="codefrag">TextInputFormat</span>,
+        并且框架检测到输入文件的后缀是<em>.gz</em>和<em>.lzo</em>,就会使用对应的<span class="codefrag">CompressionCodec</span>自动解压缩这些文件。
+        但是需要注意,上述带后缀的压缩文件不会被切分,并且整个压缩文件会分给一个mapper来处理。
+        </p>
+<a name="N10AD2"></a><a name="InputSplit"></a>
+<h4>InputSplit</h4>
+<p>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/InputSplit.html">
+          InputSplit</a> 是一个单独的<span class="codefrag">Mapper</span>要处理的数据块。</p>
+<p>一般的<span class="codefrag">InputSplit</span> 是字节样式输入,然后由<span class="codefrag">RecordReader</span>处理并转化成记录样式。
+          </p>
+<p>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileSplit.html">
+			  FileSplit</a> 是默认的<span class="codefrag">InputSplit</span>。 它把
+          <span class="codefrag">map.input.file</span> 设定为输入文件的路径,输入文件是逻辑分块文件。
+          </p>
+<a name="N10AF7"></a><a name="RecordReader"></a>
+<h4>RecordReader</h4>
+<p>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/RecordReader.html">
+          RecordReader</a> 从<span class="codefrag">InputSlit</span>读入<span class="codefrag">&lt;key, value&gt;</span>对。 
+          </p>
+<p>一般的,<span class="codefrag">RecordReader</span> 把由<span class="codefrag">InputSplit</span>
+	  提供的字节样式的输入文件,转化成由<span class="codefrag">Mapper</span>处理的记录样式的文件。
+          因此<span class="codefrag">RecordReader</span>负责处理记录的边界情况和把数据表示成keys/values对形式。
+          </p>
+<a name="N10B1A"></a><a name="%E4%BD%9C%E4%B8%9A%E7%9A%84%E8%BE%93%E5%87%BA"></a>
+<h3 class="h4">作业的输出</h3>
+<p>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/OutputFormat.html">
+        OutputFormat</a> 描述Map/Reduce作业的输出样式。
+        </p>
+<p>Map/Reduce框架根据作业的<span class="codefrag">OutputFormat</span>来:
+        </p>
+<ol>
+          
+<li>
+            检验作业的输出,例如检查输出路径是否已经存在。
+          </li>
+          
+<li>
+            提供一个<span class="codefrag">RecordWriter</span>的实现,用来输出作业结果。
+            输出文件保存在<span class="codefrag">FileSystem</span>上。
+          </li>
+        
+</ol>
+<p>
+<span class="codefrag">TextOutputFormat</span>是默认的
+        <span class="codefrag">OutputFormat</span>。</p>
+<a name="N10B43"></a><a name="%E4%BB%BB%E5%8A%A1%E7%9A%84Side-Effect+File"></a>
+<h4>任务的Side-Effect File</h4>
+<p>在一些应用程序中,子任务需要产生一些side-file,这些文件与作业实际输出结果的文件不同。
+          </p>
+<p>在这种情况下,同一个<span class="codefrag">Mapper</span>或者<span class="codefrag">Reducer</span>的两个实例(比如预防性任务)同时打开或者写
+	  <span class="codefrag">FileSystem</span>上的同一文件就会产生冲突。因此应用程序在写文件的时候需要为每次任务尝试(不仅仅是每次任务,每个任务可以尝试执行很多次)选取一个独一无二的文件名(使用attemptid,例如<span class="codefrag">task_200709221812_0001_m_000000_0</span>)。 
+          </p>
+<p>为了避免冲突,Map/Reduce框架为每次尝试执行任务都建立和维护一个特殊的
+          <span class="codefrag">${mapred.output.dir}/_temporary/_${taskid}</span>子目录,这个目录位于本次尝试执行任务输出结果所在的<span class="codefrag">FileSystem</span>上,可以通过
+          <span class="codefrag">${mapred.work.output.dir}</span>来访问这个子目录。
+          对于成功完成的任务尝试,只有<span class="codefrag">${mapred.output.dir}/_temporary/_${taskid}</span>下的文件会<em>移动</em>到<span class="codefrag">${mapred.output.dir}</span>。当然,框架会丢弃那些失败的任务尝试的子目录。这种处理过程对于应用程序来说是完全透明的。</p>
+<p>在任务执行期间,应用程序在写文件时可以利用这个特性,比如
+	  通过<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileOutputFormat.html#getWorkOutputPath(org.apache.hadoop.mapred.JobConf)">
+          FileOutputFormat.getWorkOutputPath()</a>获得<span class="codefrag">${mapred.work.output.dir}</span>目录,
+	  并在其下创建任意任务执行时所需的side-file,框架在任务尝试成功时会马上移动这些文件,因此不需要在程序内为每次任务尝试选取一个独一无二的名字。
+          </p>
+<p>注意:在每次任务尝试执行期间,<span class="codefrag">${mapred.work.output.dir}</span> 的值实际上是
+          <span class="codefrag">${mapred.output.dir}/_temporary/_{$taskid}</span>,这个值是Map/Reduce框架创建的。
+          所以使用这个特性的方法是,在<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileOutputFormat.html#getWorkOutputPath(org.apache.hadoop.mapred.JobConf)">
+          FileOutputFormat.getWorkOutputPath() </a>
+	  路径下创建side-file即可。
+	  </p>
+<p>对于只使用map不使用reduce的作业,这个结论也成立。这种情况下,map的输出结果直接生成到HDFS上。
+           </p>
+<a name="N10B8B"></a><a name="RecordWriter"></a>
+<h4>RecordWriter</h4>
+<p>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/RecordWriter.html">
+          RecordWriter</a> 生成<span class="codefrag">&lt;key, value&gt;</span> 
+          对到输出文件。</p>
+<p>RecordWriter的实现把作业的输出结果写到
+          <span class="codefrag">FileSystem</span>。</p>
+<a name="N10BA2"></a><a name="%E5%85%B6%E4%BB%96%E6%9C%89%E7%94%A8%E7%9A%84%E7%89%B9%E6%80%A7"></a>
+<h3 class="h4">其他有用的特性</h3>
+<a name="N10BA8"></a><a name="Counters"></a>
+<h4>Counters</h4>
+<p>
+<span class="codefrag">Counters</span> 是多个由Map/Reduce框架或者应用程序定义的全局计数器。
+          每一个<span class="codefrag">Counter</span>可以是任何一种 
+          <span class="codefrag">Enum</span>类型。同一特定<span class="codefrag">Enum</span>类型的Counter可以汇集到一个组,其类型为<span class="codefrag">Counters.Group</span>。</p>
+<p>应用程序可以定义任意(Enum类型)的<span class="codefrag">Counters</span>并且可以通过 <span class="codefrag">map</span> 或者 
+          <span class="codefrag">reduce</span>方法中的
+          <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/Reporter.html#incrCounter(java.lang.Enum, long)">
+          Reporter.incrCounter(Enum, long)</a>或者 
+          <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/Reporter.html#incrCounter(java.lang.String, java.lang.String, long amount)">
+          Reporter.incrCounter(String, String, long)</a>
+          更新。之后框架会汇总这些全局counters。 
+          </p>
+<a name="N10BD4"></a><a name="DistributedCache"></a>
+<h4>DistributedCache</h4>
+<p>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/filecache/DistributedCache.html">
+          DistributedCache</a> 可将具体应用相关的、大尺寸的、只读的文件有效地分布放置。
+          </p>
+<p>
+<span class="codefrag">DistributedCache</span> 是Map/Reduce框架提供的功能,能够缓存应用程序所需的文件
+		(包括文本,档案文件,jar文件等)。
+          </p>
+<p>应用程序在<span class="codefrag">JobConf</span>中通过url(hdfs://)指定需要被缓存的文件。
+	  <span class="codefrag">DistributedCache</span>假定由hdfs://格式url指定的文件已经在 
+          <span class="codefrag">FileSystem</span>上了。</p>
+<p>Map-Redcue框架在作业所有任务执行之前会把必要的文件拷贝到slave节点上。
+          它运行高效是因为每个作业的文件只拷贝一次并且为那些没有文档的slave节点缓存文档。      
+          </p>
+<p>
+<span class="codefrag">DistributedCache</span> 根据缓存文档修改的时间戳进行追踪。
+	  在作业执行期间,当前应用程序或者外部程序不能修改缓存文件。 
+          </p>
+<p>
+<span class="codefrag">distributedCache</span>可以分发简单的只读数据或文本文件,也可以分发复杂类型的文件例如归档文件和jar文件。归档文件(zip,tar,tgz和tar.gz文件)在slave节点上会被<em>解档(un-archived)</em>。
+          这些文件可以设置<em>执行权限</em>。</p>
+<p>用户可以通过设置<span class="codefrag">mapred.cache.{files|archives}</span>来分发文件。
+          如果要分发多个文件,可以使用逗号分隔文件所在路径。也可以利用API来设置该属性:
+            <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/filecache/DistributedCache.html#addCacheFile(java.net.URI,%20org.apache.hadoop.conf.Configuration)">
+          DistributedCache.addCacheFile(URI,conf)</a>/
+          <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/filecache/DistributedCache.html#addCacheArchive(java.net.URI,%20org.apache.hadoop.conf.Configuration)">
+          DistributedCache.addCacheArchive(URI,conf)</a> and
+          <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/filecache/DistributedCache.html#setCacheFiles(java.net.URI[],%20org.apache.hadoop.conf.Configuration)">
+          DistributedCache.setCacheFiles(URIs,conf)</a>/
+          <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/filecache/DistributedCache.html#setCacheArchives(java.net.URI[],%20org.apache.hadoop.conf.Configuration)">
+          DistributedCache.setCacheArchives(URIs,conf)</a>
+          其中URI的形式是
+          <span class="codefrag">hdfs://host:port/absolute-path#link-name</span>
+          在Streaming程序中,可以通过命令行选项
+          <span class="codefrag">-cacheFile/-cacheArchive</span>
+          分发文件。</p>
+<p>
+	  用户可以通过<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/filecache/DistributedCache.html#createSymlink(org.apache.hadoop.conf.Configuration)">
+          DistributedCache.createSymlink(Configuration)</a>方法让<span class="codefrag">DistributedCache</span>
+        在<em>当前工作目录</em>下创建到缓存文件的符号链接。
+	或者通过设置配置文件属性<span class="codefrag">mapred.create.symlink</span>为<span class="codefrag">yes</span>。
+	分布式缓存会截取URI的片段作为链接的名字。
+	例如,URI是 <span class="codefrag">hdfs://namenode:port/lib.so.1#lib.so</span>,
+	则在task当前工作目录会有名为<span class="codefrag">lib.so</span>的链接,
+        它会链接分布式缓存中的<span class="codefrag">lib.so.1</span>。
+        </p>
+<p>
+<span class="codefrag">DistributedCache</span>可在map/reduce任务中作为
+        一种基础软件分发机制使用。它可以被用于分发jar包和本地库(native libraries)。
+        <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/filecache/DistributedCache.html#addArchiveToClassPath(org.apache.hadoop.fs.Path,%20org.apache.hadoop.conf.Configuration)">
+        DistributedCache.addArchiveToClassPath(Path, Configuration)</a>和
+        <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/filecache/DistributedCache.html#addFileToClassPath(org.apache.hadoop.fs.Path,%20org.apache.hadoop.conf.Configuration)">
+        DistributedCache.addFileToClassPath(Path, Configuration)</a> API能够被用于
+        缓存文件和jar包,并把它们加入子jvm的<em>classpath</em>。也可以通过设置配置文档里的属性
+        <span class="codefrag">mapred.job.classpath.{files|archives}</span>达到相同的效果。缓存文件可用于分发和装载本地库。
+        </p>
+<a name="N10C50"></a><a name="Tool"></a>
+<h4>Tool</h4>
+<p>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/util/Tool.html">Tool</a> 
+          接口支持处理常用的Hadoop命令行选项。
+          </p>
+<p>
+<span class="codefrag">Tool</span> 是Map/Reduce工具或应用的标准。应用程序应只处理其定制参数,
+          要把标准命令行选项通过
+		<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/util/ToolRunner.html#run(org.apache.hadoop.util.Tool, java.lang.String[])"> ToolRunner.run(Tool, String[])</a> 
+		委托给
+          <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/util/GenericOptionsParser.html">
+          GenericOptionsParser</a>处理。
+          </p>
+<p>
+            Hadoop命令行的常用选项有:<br>
+            
+<span class="codefrag">
+              -conf &lt;configuration file&gt;
+            </span>
+            
+<br>
+            
+<span class="codefrag">
+              -D &lt;property=value&gt;
+            </span>
+            
+<br>
+            
+<span class="codefrag">
+              -fs &lt;local|namenode:port&gt;
+            </span>
+            
+<br>
+            
+<span class="codefrag">
+              -jt &lt;local|jobtracker:port&gt;
+            </span>
+          
+</p>
+<a name="N10C81"></a><a name="IsolationRunner"></a>
+<h4>IsolationRunner</h4>
+<p>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/IsolationRunner.html">
+          IsolationRunner</a> 是帮助调试Map/Reduce程序的工具。</p>
+<p>使用<span class="codefrag">IsolationRunner</span>的方法是,首先设置
+          <span class="codefrag">keep.failed.tasks.files</span>属性为<span class="codefrag">true</span> 
+          (同时参考<span class="codefrag">keep.tasks.files.pattern</span>)。</p>
+<p>
+            然后,登录到任务运行失败的节点上,进入
+            <span class="codefrag">TaskTracker</span>的本地路径运行
+            <span class="codefrag">IsolationRunner</span>:<br>
+            
+<span class="codefrag">$ cd &lt;local path&gt;/taskTracker/${taskid}/work</span>
+<br>
+            
+<span class="codefrag">
+              $ bin/hadoop org.apache.hadoop.mapred.IsolationRunner ../job.xml
+            </span>
+          
+</p>
+<p>
+<span class="codefrag">IsolationRunner</span>会把失败的任务放在单独的一个能够调试的jvm上运行,并且采用和之前完全一样的输入数据。
+		</p>
+<a name="N10CB4"></a><a name="Profiling"></a>
+<h4>Profiling</h4>
+<p>Profiling是一个工具,它使用内置的java profiler工具进行分析获得(2-3个)map或reduce样例运行分析报告。</p>
+<p>用户可以通过设置属性<span class="codefrag">mapred.task.profile</span>指定系统是否采集profiler信息。
+          利用api<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setProfileEnabled(boolean)">
+          JobConf.setProfileEnabled(boolean)可以修改属性值</a>。如果设为<span class="codefrag">true</span>,
+          则开启profiling功能。profiler信息保存在用户日志目录下。缺省情况,profiling功能是关闭的。</p>
+<p>如果用户设定使用profiling功能,可以使用配置文档里的属性
+          <span class="codefrag">mapred.task.profile.{maps|reduces}</span>
+          设置要profile map/reduce task的范围。设置该属性值的api是
+           <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setProfileTaskRange(boolean,%20java.lang.String)">
+          JobConf.setProfileTaskRange(boolean,String)</a>。
+          范围的缺省值是<span class="codefrag">0-2</span>。</p>
+<p>用户可以通过设定配置文档里的属性<span class="codefrag">mapred.task.profile.params</span>
+          来指定profiler配置参数。修改属性要使用api
+          <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setProfileParams(java.lang.String)">
+          JobConf.setProfileParams(String)</a>。当运行task时,如果字符串包含<span class="codefrag">%s</span>。
+          它会被替换成profileing的输出文件名。这些参数会在命令行里传递到子JVM中。缺省的profiling
+          参数是
+          <span class="codefrag">-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s</span>。
+          </p>
+<a name="N10CE8"></a><a name="%E8%B0%83%E8%AF%95"></a>
+<h4>调试</h4>
+<p>Map/Reduce框架能够运行用户提供的用于调试的脚本程序。 
+          当map/reduce任务失败时,用户可以通过运行脚本在任务日志(例如任务的标准输出、标准错误、系统日志以及作业配置文件)上做后续处理工作。用户提供的调试脚本程序的标准输出和标准错误会输出为诊断文件。如果需要的话这些输出结果也可以打印在用户界面上。</p>
+<p> 在接下来的章节,我们讨论如何与作业一起提交调试脚本。为了提交调试脚本,
+          首先要把这个脚本分发出去,而且还要在配置文件里设置。
+     	  </p>
+<a name="N10CF4"></a><a name="%E5%A6%82%E4%BD%95%E5%88%86%E5%8F%91%E8%84%9A%E6%9C%AC%E6%96%87%E4%BB%B6%EF%BC%9A"></a>
+<h5> 如何分发脚本文件:</h5>
+<p>用户要用
+          <a href="mapred_tutorial.html#DistributedCache">DistributedCache</a>
+          机制来<em>分发</em>和<em>链接</em>脚本文件</p>
+<a name="N10D08"></a><a name="%E5%A6%82%E4%BD%95%E6%8F%90%E4%BA%A4%E8%84%9A%E6%9C%AC%EF%BC%9A"></a>
+<h5> 如何提交脚本:</h5>
+<p> 一个快速提交调试脚本的方法是分别为需要调试的map任务和reduce任务设置
+		"mapred.map.task.debug.script" 和 "mapred.reduce.task.debug.script"
+	 属性的值。这些属性也可以通过
+          <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setMapDebugScript(java.lang.String)">
+          JobConf.setMapDebugScript(String) </a>和 
+          <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setReduceDebugScript(java.lang.String)">
+          JobConf.setReduceDebugScript(String) </a>API来设置。对于streaming,
+          可以分别为需要调试的map任务和reduce任务使用命令行选项-mapdebug 和 -reducedegug来提交调试脚本。
+          </p>
+<p>脚本的参数是任务的标准输出、标准错误、系统日志以及作业配置文件。在运行map/reduce失败的节点上运行调试命令是:
+		 <br>
+          
+<span class="codefrag"> $script $stdout $stderr $syslog $jobconf </span> 
+</p>
+<p> Pipes 程序根据第五个参数获得c++程序名。
+          因此调试pipes程序的命令是<br> 
+          
+<span class="codefrag">$script $stdout $stderr $syslog $jobconf $program </span>  
+          
+</p>
+<a name="N10D2A"></a><a name="%E9%BB%98%E8%AE%A4%E8%A1%8C%E4%B8%BA"></a>
+<h5> 默认行为 </h5>
+<p> 对于pipes,默认的脚本会用gdb处理core dump,
+          打印 stack trace并且给出正在运行线程的信息。</p>
+<a name="N10D35"></a><a name="JobControl"></a>
+<h4>JobControl</h4>
+<p>
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/jobcontrol/package-summary.html">
+          JobControl</a>是一个工具,它封装了一组Map/Reduce作业以及他们之间的依赖关系。
+	  </p>
+<a name="N10D42"></a><a name="%E6%95%B0%E6%8D%AE%E5%8E%8B%E7%BC%A9"></a>
+<h4>数据压缩</h4>
+<p>Hadoop Map/Reduce框架为应用程序的写入文件操作提供压缩工具,这些工具可以为map输出的中间数据和作业最终输出数据(例如reduce的输出)提供支持。它还附带了一些
+          <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/io/compress/CompressionCodec.html">
+          CompressionCodec</a>的实现,比如实现了
+          <a href="http://www.zlib.net/">zlib</a>和<a href="http://www.oberhumer.com/opensource/lzo/">lzo</a>压缩算法。
+           Hadoop同样支持<a href="http://www.gzip.org/">gzip</a>文件格式。 
+          </p>
+<p>考虑到性能问题(zlib)以及Java类库的缺失(lzo)等因素,Hadoop也为上述压缩解压算法提供本地库的实现。更多的细节请参考
+          <a href="native_libraries.html">这里</a>。</p>
+<a name="N10D62"></a><a name="%E4%B8%AD%E9%97%B4%E8%BE%93%E5%87%BA"></a>
+<h5>中间输出</h5>
+<p>应用程序可以通过
+            <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setCompressMapOutput(boolean)">
+            JobConf.setCompressMapOutput(boolean)</a>api控制map输出的中间结果,并且可以通过
+            <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/JobConf.html#setMapOutputCompressorClass(java.lang.Class)">
+            JobConf.setMapOutputCompressorClass(Class)</a>api指定
+            <span class="codefrag">CompressionCodec</span>。
+        </p>
+<a name="N10D77"></a><a name="%E4%BD%9C%E4%B8%9A%E8%BE%93%E5%87%BA"></a>
+<h5>作业输出</h5>
+<p>应用程序可以通过
+            <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileOutputFormat.html#setCompressOutput(org.apache.hadoop.mapred.JobConf,%20boolean)">
+            FileOutputFormat.setCompressOutput(JobConf, boolean)</a>
+            api控制输出是否需要压缩并且可以使用 
+            <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/FileOutputFormat.html#setOutputCompressorClass(org.apache.hadoop.mapred.JobConf,%20java.lang.Class)">
+            FileOutputFormat.setOutputCompressorClass(JobConf, Class)</a>api指定<span class="codefrag">CompressionCodec</span>。</p>
+<p>如果作业输出要保存成 
+            <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/SequenceFileOutputFormat.html">
+            SequenceFileOutputFormat</a>格式,需要使用
+            <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/mapred/SequenceFileOutputFormat.html#setOutputCompressionType(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.io.SequenceFile.CompressionType)">
+            SequenceFileOutputFormat.setOutputCompressionType(JobConf, 
+            SequenceFile.CompressionType)</a>api,来设定
+            <span class="codefrag">SequenceFile.CompressionType</span> (i.e. <span class="codefrag">RECORD</span> / 
+            <span class="codefrag">BLOCK</span> - 默认是<span class="codefrag">RECORD</span>)。
+            </p>
+</div>
+
+    
+<a name="N10DA6"></a><a name="%E4%BE%8B%E5%AD%90%EF%BC%9AWordCount+v2.0"></a>
+<h2 class="h3">例子:WordCount v2.0</h2>
+<div class="section">
+<p>这里是一个更全面的<span class="codefrag">WordCount</span>例子,它使用了我们已经讨论过的很多Map/Reduce框架提供的功能。 
+      </p>
+<p>运行这个例子需要HDFS的某些功能,特别是
+      <span class="codefrag">DistributedCache</span>相关功能。因此这个例子只能运行在
+      <a href="quickstart.html#SingleNodeSetup">伪分布式</a> 或者
+      <a href="quickstart.html#Fully-Distributed+Operation">完全分布式模式</a>的 
+      Hadoop上。</p>
+<a name="N10DC0"></a><a name="%E6%BA%90%E4%BB%A3%E7%A0%81-N10DC0"></a>
+<h3 class="h4">源代码</h3>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+          
+<tr>
+            
+<th colspan="1" rowspan="1"></th>
+            <th colspan="1" rowspan="1">WordCount.java</th>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">1.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">package org.myorg;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">2.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">3.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">import java.io.*;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">4.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">import java.util.*;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">5.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">6.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">import org.apache.hadoop.fs.Path;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">7.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">import org.apache.hadoop.filecache.DistributedCache;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">8.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">import org.apache.hadoop.conf.*;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">9.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">import org.apache.hadoop.io.*;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">10.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">import org.apache.hadoop.mapred.*;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">11.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">import org.apache.hadoop.util.*;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">12.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">13.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">public class WordCount extends Configured implements Tool {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">14.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">15.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;
+              <span class="codefrag">
+                public static class Map extends MapReduceBase 
+                implements Mapper&lt;LongWritable, Text, Text, IntWritable&gt; {
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">16.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">17.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                static enum Counters { INPUT_WORDS }
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">18.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">19.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                private final static IntWritable one = new IntWritable(1);
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">20.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">private Text word = new Text();</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">21.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">22.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">private boolean caseSensitive = true;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">23.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">private Set&lt;String&gt; patternsToSkip = new HashSet&lt;String&gt;();</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">24.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">25.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">private long numRecords = 0;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">26.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">private String inputFile;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">27.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">28.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">public void configure(JobConf job) {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">29.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                caseSensitive = job.getBoolean("wordcount.case.sensitive", true);
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">30.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">inputFile = job.get("map.input.file");</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">31.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">32.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">if (job.getBoolean("wordcount.skip.patterns", false)) {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">33.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">Path[] patternsFiles = new Path[0];</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">34.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">try {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">35.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                patternsFiles = DistributedCache.getLocalCacheFiles(job);
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">36.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">} catch (IOException ioe) {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">37.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                System.err.println("Caught exception while getting cached files: " 
+                + StringUtils.stringifyException(ioe));
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">38.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">39.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">for (Path patternsFile : patternsFiles) {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">40.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">parseSkipFile(patternsFile);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">41.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">42.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">43.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">44.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">45.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">private void parseSkipFile(Path patternsFile) {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">46.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">try {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">47.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                BufferedReader fis = 
+                  new BufferedReader(new FileReader(patternsFile.toString()));
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">48.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">String pattern = null;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">49.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">while ((pattern = fis.readLine()) != null) {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">50.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">patternsToSkip.add(pattern);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">51.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">52.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">} catch (IOException ioe) {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">53.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                System.err.println("Caught exception while parsing the cached file '" +
+                                   patternsFile + "' : " + 
+                                   StringUtils.stringifyException(ioe));
+                
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">54.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">55.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">56.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">57.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                public void map(LongWritable key, Text value, 
+                OutputCollector&lt;Text, IntWritable&gt; output, 
+                Reporter reporter) throws IOException {
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">58.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                String line = 
+                  (caseSensitive) ? value.toString() : 
+                                    value.toString().toLowerCase();
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">59.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">60.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">for (String pattern : patternsToSkip) {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">61.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">line = line.replaceAll(pattern, "");</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">62.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">63.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">64.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">StringTokenizer tokenizer = new StringTokenizer(line);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">65.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">while (tokenizer.hasMoreTokens()) {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">66.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">word.set(tokenizer.nextToken());</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">67.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">output.collect(word, one);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">68.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">reporter.incrCounter(Counters.INPUT_WORDS, 1);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">69.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">70.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">71.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">if ((++numRecords % 100) == 0) {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">72.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                reporter.setStatus("Finished processing " + numRecords + 
+                                   " records " + "from the input file: " + 
+                                   inputFile);
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">73.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">74.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">75.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">76.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">77.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;
+              <span class="codefrag">
+                public static class Reduce extends MapReduceBase implements 
+                Reducer&lt;Text, IntWritable, Text, IntWritable&gt; {
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">78.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                public void reduce(Text key, Iterator&lt;IntWritable&gt; values,
+                OutputCollector&lt;Text, IntWritable&gt; output, 
+                Reporter reporter) throws IOException {
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">79.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">int sum = 0;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">80.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">while (values.hasNext()) {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">81.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">sum += values.next().get();</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">82.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">83.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">output.collect(key, new IntWritable(sum));</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">84.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">85.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">86.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">87.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;
+              <span class="codefrag">public int run(String[] args) throws Exception {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">88.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                JobConf conf = new JobConf(getConf(), WordCount.class);
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">89.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">conf.setJobName("wordcount");</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">90.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">91.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">conf.setOutputKeyClass(Text.class);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">92.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">conf.setOutputValueClass(IntWritable.class);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">93.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">94.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">conf.setMapperClass(Map.class);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">95.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">conf.setCombinerClass(Reduce.class);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">96.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">conf.setReducerClass(Reduce.class);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">97.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">98.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">conf.setInputFormat(TextInputFormat.class);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">99.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">conf.setOutputFormat(TextOutputFormat.class);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">100.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">101.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                List&lt;String&gt; other_args = new ArrayList&lt;String&gt;();
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">102.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">for (int i=0; i &lt; args.length; ++i) {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">103.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">if ("-skip".equals(args[i])) {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">104.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf);
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">105.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                conf.setBoolean("wordcount.skip.patterns", true);
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">106.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">} else {</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">107.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">other_args.add(args[i]);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">108.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">109.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">110.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">111.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">FileInputFormat.setInputPaths(conf, new Path(other_args.get(0)));</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">112.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">113.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">114.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">JobClient.runJob(conf);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">115.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">return 0;</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">116.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">117.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">118.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;
+              <span class="codefrag">
+                public static void main(String[] args) throws Exception {
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">119.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">
+                int res = ToolRunner.run(new Configuration(), new WordCount(), 
+                                         args);
+              </span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">120.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <span class="codefrag">System.exit(res);</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">121.</td>
+            <td colspan="1" rowspan="1">
+              &nbsp;&nbsp;
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">122.</td>
+            <td colspan="1" rowspan="1">
+              <span class="codefrag">}</span>
+            </td>
+          
+</tr>
+          
+<tr>
+            
+<td colspan="1" rowspan="1">123.</td>
+            <td colspan="1" rowspan="1"></td>
+          
+</tr>
+        
+</table>
+<a name="N11522"></a><a name="%E8%BF%90%E8%A1%8C%E6%A0%B7%E4%BE%8B"></a>
+<h3 class="h4">运行样例</h3>
+<p>输入样例:</p>
+<p>
+          
+<span class="codefrag">$ bin/hadoop dfs -ls /usr/joe/wordcount/input/</span>
+<br>
+          
+<span class="codefrag">/usr/joe/wordcount/input/file01</span>
+<br>
+          
+<span class="codefrag">/usr/joe/wordcount/input/file02</span>
+<br>
+          
+<br>
+          
+<span class="codefrag">$ bin/hadoop dfs -cat /usr/joe/wordcount/input/file01</span>
+<br>
+          
+<span class="codefrag">Hello World, Bye World!</span>
+<br>
+          
+<br>
+          
+<span class="codefrag">$ bin/hadoop dfs -cat /usr/joe/wordcount/input/file02</span>
+<br>
+          
+<span class="codefrag">Hello Hadoop, Goodbye to hadoop.</span>
+        
+</p>
+<p>运行程序:</p>
+<p>
+          
+<span class="codefrag">
+            $ bin/hadoop jar /usr/joe/wordcount.jar org.myorg.WordCount 
+              /usr/joe/wordcount/input /usr/joe/wordcount/output 
+          </span>
+        
+</p>
+<p>输出:</p>
+<p>
+          
+<span class="codefrag">
+            $ bin/hadoop dfs -cat /usr/joe/wordcount/output/part-00000
+          </span>
+          
+<br>
+          
+<span class="codefrag">Bye    1</span>
+<br>
+          
+<span class="codefrag">Goodbye    1</span>
+<br>
+          
+<span class="codefrag">Hadoop,    1</span>
+<br>
+          
+<span class="codefrag">Hello    2</span>
+<br>
+          
+<span class="codefrag">World!    1</span>
+<br>
+          
+<span class="codefrag">World,    1</span>
+<br>
+          
+<span class="codefrag">hadoop.    1</span>
+<br>
+          
+<span class="codefrag">to    1</span>
+<br>
+        
+</p>
+<p>注意此时的输入与第一个版本的不同,输出的结果也有不同。
+	</p>
+<p>现在通过<span class="codefrag">DistributedCache</span>插入一个模式文件,文件中保存了要被忽略的单词模式。
+	</p>
+<p>
+          
+<span class="codefrag">$ hadoop dfs -cat /user/joe/wordcount/patterns.txt</span>
+<br>
+          
+<span class="codefrag">\.</span>
+<br>
+          
+<span class="codefrag">\,</span>
+<br>
+          
+<span class="codefrag">\!</span>
+<br>
+          
+<span class="codefrag">to</span>
+<br>
+        
+</p>
+<p>再运行一次,这次使用更多的选项:</p>
+<p>
+          
+<span class="codefrag">
+            $ bin/hadoop jar /usr/joe/wordcount.jar org.myorg.WordCount 
+              -Dwordcount.case.sensitive=true /usr/joe/wordcount/input 
+              /usr/joe/wordcount/output -skip /user/joe/wordcount/patterns.txt
+          </span>
+        
+</p>
+<p>应该得到这样的输出:</p>
+<p>
+          
+<span class="codefrag">
+            $ bin/hadoop dfs -cat /usr/joe/wordcount/output/part-00000
+          </span>
+          
+<br>
+          
+<span class="codefrag">Bye    1</span>
+<br>
+          
+<span class="codefrag">Goodbye    1</span>
+<br>
+          
+<span class="codefrag">Hadoop    1</span>
+<br>
+          
+<span class="codefrag">Hello    2</span>
+<br>
+          
+<span class="codefrag">World    2</span>
+<br>
+          
+<span class="codefrag">hadoop    1</span>
+<br>
+        
+</p>
+<p>再运行一次,这一次关闭大小写敏感性(case-sensitivity):</p>
+<p>
+          
+<span class="codefrag">
+            $ bin/hadoop jar /usr/joe/wordcount.jar org.myorg.WordCount 
+              -Dwordcount.case.sensitive=false /usr/joe/wordcount/input 
+              /usr/joe/wordcount/output -skip /user/joe/wordcount/patterns.txt
+          </span>
+        
+</p>
+<p>输出:</p>
+<p>
+          
+<span class="codefrag">
+            $ bin/hadoop dfs -cat /usr/joe/wordcount/output/part-00000
+          </span>
+          
+<br>
+          
+<span class="codefrag">bye    1</span>
+<br>
+          
+<span class="codefrag">goodbye    1</span>
+<br>
+          
+<span class="codefrag">hadoop    2</span>
+<br>
+          
+<span class="codefrag">hello    2</span>
+<br>
+          
+<span class="codefrag">world    2</span>
+<br>
+        
+</p>
+<a name="N115F6"></a><a name="%E7%A8%8B%E5%BA%8F%E8%A6%81%E7%82%B9"></a>
+<h3 class="h4">程序要点</h3>
+<p>
+	通过使用一些Map/Reduce框架提供的功能,<span class="codefrag">WordCount</span>的第二个版本在原始版本基础上有了如下的改进:
+        </p>
+<ul>
+          
+<li>
+            展示了应用程序如何在<span class="codefrag">Mapper</span> (和<span class="codefrag">Reducer</span>)中通过<span class="codefrag">configure</span>方法
+		修改配置参数(28-43行)。
+          </li>
+          
+<li>
+            展示了作业如何使用<span class="codefrag">DistributedCache</span> 来分发只读数据。
+	这里允许用户指定单词的模式,在计数时忽略那些符合模式的单词(104行)。
+          </li>
+          
+<li>
+        展示<span class="codefrag">Tool</span>接口和<span class="codefrag">GenericOptionsParser</span>处理Hadoop命令行选项的功能
+            (87-116, 119行)。
+          </li>
+          
+<li>
+	展示了应用程序如何使用<span class="codefrag">Counters</span>(68行),如何通过传递给<span class="codefrag">map</span>(和<span class="codefrag">reduce</span>)
+	方法的<span class="codefrag">Reporter</span>实例来设置应用程序的状态信息(72行)。
+          </li>
+        
+</ul>
+</div>
+
+    
+<p>
+      
+<em>Java和JNI是Sun Microsystems, Inc.在美国和其它国家的注册商标。</em>
+    
+</p>
+    
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 250 - 0
docs/cn/mapred_tutorial.pdf


+ 462 - 0
docs/cn/native_libraries.html

@@ -0,0 +1,462 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Hadoop本地库</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://hadoop.apache.org/core/">项目</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop">维基</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.18文档</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">文档</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">概述</a>
+</div>
+<div class="menuitem">
+<a href="quickstart.html">快速入门</a>
+</div>
+<div class="menuitem">
+<a href="cluster_setup.html">集群搭建</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_design.html">HDFS构架设计</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_user_guide.html">HDFS使用指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_permissions_guide.html">HDFS权限指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_quota_admin_guide.html">HDFS配额管理指南</a>
+</div>
+<div class="menuitem">
+<a href="commands_manual.html">命令手册</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_shell.html">FS Shell使用指南</a>
+</div>
+<div class="menuitem">
+<a href="distcp.html">DistCp使用指南</a>
+</div>
+<div class="menuitem">
+<a href="mapred_tutorial.html">Map-Reduce教程</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">Hadoop本地库</div>
+</div>
+<div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
+<a href="hadoop_archives.html">Hadoop Archives</a>
+</div>
+<div class="menuitem">
+<a href="hod.html">Hadoop On Demand</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/">维基</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">发行说明</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">变更日志</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="native_libraries.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Hadoop本地库</h1>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#%E7%9B%AE%E7%9A%84">目的</a>
+</li>
+<li>
+<a href="#%E7%BB%84%E4%BB%B6">组件</a>
+</li>
+<li>
+<a href="#%E4%BD%BF%E7%94%A8%E6%96%B9%E6%B3%95">使用方法</a>
+</li>
+<li>
+<a href="#%E6%94%AF%E6%8C%81%E7%9A%84%E5%B9%B3%E5%8F%B0">支持的平台</a>
+</li>
+<li>
+<a href="#%E6%9E%84%E5%BB%BAHadoop%E6%9C%AC%E5%9C%B0%E5%BA%93">构建Hadoop本地库</a>
+<ul class="minitoc">
+<li>
+<a href="#%E6%B3%A8%E6%84%8F">注意</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#%E4%BD%BF%E7%94%A8DistributedCache+%E5%8A%A0%E8%BD%BD%E6%9C%AC%E5%9C%B0%E5%BA%93"> 使用DistributedCache 加载本地库</a>
+</li>
+</ul>
+</div>
+  
+    
+<a name="N1000D"></a><a name="%E7%9B%AE%E7%9A%84"></a>
+<h2 class="h3">目的</h2>
+<div class="section">
+<p>
+     鉴于性能问题以及某些Java类库的缺失,对于某些组件,Hadoop提供了自己的本地实现。
+	这些组件保存在Hadoop的一个独立的动态链接的库里。这个库在*nix平台上叫<em>libhadoop.so</em>. 本文主要介绍本地库的使用方法以及如何构建本地库。
+</p>
+</div>
+    
+    
+<a name="N1001A"></a><a name="%E7%BB%84%E4%BB%B6"></a>
+<h2 class="h3">组件</h2>
+<div class="section">
+<p>Hadoop现在已经有以下
+      <a href="http://hadoop.apache.org/core/docs/r0.18.2/api/org/apache/hadoop/io/compress/CompressionCodec.html">
+      compression codecs</a>本地组件:</p>
+<ul>
+        
+<li>
+<a href="http://www.zlib.net/">zlib</a>
+</li>
+        
+<li>
+<a href="http://www.gzip.org/">gzip</a>
+</li>
+        
+<li>
+<a href="http://www.oberhumer.com/opensource/lzo/">lzo</a>
+</li>
+      
+</ul>
+<p>在以上组件中,lzo和gzip压缩编解码器必须使用hadoop本地库才能运行。
+      </p>
+</div>
+
+    
+<a name="N1003D"></a><a name="%E4%BD%BF%E7%94%A8%E6%96%B9%E6%B3%95"></a>
+<h2 class="h3">使用方法</h2>
+<div class="section">
+<p>hadoop本地库的用法很简单:</p>
+<ul>
+        
+<li>
+          看一下
+	<a href="#%E6%94%AF%E6%8C%81%E7%9A%84%E5%B9%B3%E5%8F%B0">支持的平台</a>.
+        </li>
+        
+<li>
+           
+<a href="http://hadoop.apache.org/core/releases.html#Download">下载</a> 预构建的32位i386架构的Linux本地hadoop库(可以在hadoop发行版的<span class="codefrag">lib/native</span>目录下找到)或者自己
+          <a href="#%E6%9E%84%E5%BB%BAHadoop%E6%9C%AC%E5%9C%B0%E5%BA%93">构建</a> 这些库。
+        </li>
+        
+<li>
+          确保你的平台已经安装了<strong>zlib-1.2</strong>以上版本或者<strong>lzo2.0</strong>以上版本的软件包或者两者均已安装(根据你的需要)。
+        </li>
+      
+</ul>
+<p>
+<span class="codefrag">bin/hadoop</span> 脚本通过系统属性
+      <em>-Djava.library.path=&lt;path&gt;</em>来确认hadoop本地库是否包含在库路径里。</p>
+<p>检查hadoop日志文件可以查看hadoop库是否正常,正常情况下会看到:</p>
+<p>
+        
+<span class="codefrag">
+          DEBUG util.NativeCodeLoader - Trying to load the custom-built 
+          native-hadoop library... 
+        </span>
+<br>
+        
+<span class="codefrag">
+          INFO  util.NativeCodeLoader - Loaded the native-hadoop library
+        </span>
+      
+</p>
+<p>如果出错,会看到:</p>
+<p>
+        
+<span class="codefrag">
+          INFO util.NativeCodeLoader - Unable to load native-hadoop library for 
+          your platform... using builtin-java classes where applicable
+        </span>
+      
+</p>
+</div>
+    
+    
+<a name="N10086"></a><a name="%E6%94%AF%E6%8C%81%E7%9A%84%E5%B9%B3%E5%8F%B0"></a>
+<h2 class="h3">支持的平台</h2>
+<div class="section">
+<p>Hadoop本地库只支持*nix平台,已经广泛使用在GNU/Linux平台上,但是不支持
+      <a href="http://www.cygwin.com/">Cygwin</a> 
+      和 <a href="http://www.apple.com/macosx">Mac OS X</a>。 
+      </p>
+<p>已经测试过的GNU/Linux发行版本:</p>
+<ul>
+        
+<li>
+          
+<a href="http://www.redhat.com/rhel/">RHEL4</a>/<a href="http://fedora.redhat.com/">Fedora</a>
+        
+</li>
+        
+<li>
+<a href="http://www.ubuntu.com/">Ubuntu</a>
+</li>
+        
+<li>
+<a href="http://www.gentoo.org/">Gentoo</a>
+</li>
+      
+</ul>
+<p>在上述平台上,32/64位Hadoop本地库分别能和32/64位的jvm一起正常运行。
+      </p>
+</div>
+    
+    
+<a name="N100B6"></a><a name="%E6%9E%84%E5%BB%BAHadoop%E6%9C%AC%E5%9C%B0%E5%BA%93"></a>
+<h2 class="h3">构建Hadoop本地库</h2>
+<div class="section">
+<p>Hadoop本地库使用
+      <a href="http://en.wikipedia.org/wiki/ANSI_C">ANSI C</a> 编写,使用GNU autotools工具链 (autoconf, autoheader, automake, autoscan, libtool)构建。也就是说构建hadoop库的平台需要有标准C的编译器和GNU autotools工具链。请参看
+      <a href="#%E6%94%AF%E6%8C%81%E7%9A%84%E5%B9%B3%E5%8F%B0">支持的平台</a>。</p>
+<p>你的目标平台上可能会需要的软件包:
+      </p>
+<ul>
+        
+<li>
+          C 编译器 (e.g. <a href="http://gcc.gnu.org/">GNU C Compiler</a>)
+        </li>
+        
+<li>
+          GNU Autools 工具链: 
+          <a href="http://www.gnu.org/software/autoconf/">autoconf</a>, 
+          <a href="http://www.gnu.org/software/automake/">automake</a>, 
+          <a href="http://www.gnu.org/software/libtool/">libtool</a>
+        
+</li>
+        
+<li> 
+          zlib开发包 (stable version &gt;= 1.2.0)
+        </li>
+        
+<li> 
+          lzo开发包 (stable version &gt;= 2.0)
+        </li> 
+      
+</ul>
+<p>如果已经满足了上述先决条件,可以使用<span class="codefrag">build.xml</span> 
+      文件,并把其中的<span class="codefrag">compile.native</span>置为 
+      <span class="codefrag">true</span>,这样就可以生成hadoop本地库:</p>
+<p>
+<span class="codefrag">$ ant -Dcompile.native=true &lt;target&gt;</span>
+</p>
+<p>因为不是所有用户都需要Hadoop本地库,所以默认情况下hadoop不生成该库。
+      </p>
+<p>你可以在下面的路径查看新生成的hadoop本地库:</p>
+<p>
+<span class="codefrag">$ build/native/&lt;platform&gt;/lib</span>
+</p>
+<p>其中&lt;platform&gt;是下列系统属性的组合 
+      <span class="codefrag">${os.name}-${os.arch}-${sun.arch.data.model}</span>;例如 
+      Linux-i386-32。</p>
+<a name="N10109"></a><a name="%E6%B3%A8%E6%84%8F"></a>
+<h3 class="h4">注意</h3>
+<ul>
+          
+<li>
+            在生成hadoop本地库的目标平台上<strong>必须</strong> 安装了zlib和lzo开发包;但是如果你只希望使用其中一个的话,在部署时,安装其中任何一个都是足够的。
+          </li>
+          
+<li>
+		  在目标平台上生成以及部署hadoop本地库时,都需要根据32/64位jvm选取对应的32/64位zlib/lzo软件包。
+          </li>
+        
+</ul>
+</div>
+<!--DCCOMMENT:diff begin-->
+    
+<a name="N1011F"></a><a name="%E4%BD%BF%E7%94%A8DistributedCache+%E5%8A%A0%E8%BD%BD%E6%9C%AC%E5%9C%B0%E5%BA%93"></a>
+<h2 class="h3"> 使用DistributedCache 加载本地库</h2>
+<div class="section">
+<p>用户可以通过
+      <a href="mapred_tutorial.html#DistributedCache">DistributedCache</a>
+      加载本地共享库,并<em>分发</em>和建立库文件的<em>符号链接</em>。
+      </p>
+<p>这个例子描述了如何分发库文件并在从map/reduce任务中装载库文件。
+      </p>
+<ol>
+      
+<li>首先拷贝库文件到HDFS。<br>
+      
+<span class="codefrag">bin/hadoop fs -copyFromLocal mylib.so.1 /libraries/mylib.so.1</span>
+      
+</li>
+      
+<li>启动作业时包含以下代码:<br>
+      
+<span class="codefrag"> DistributedCache.createSymlink(conf); </span> 
+<br>
+      
+<span class="codefrag"> DistributedCache.addCacheFile("hdfs://host:port/libraries/mylib.so.1#mylib.so", conf);
+      </span>
+      
+</li>
+      
+<li>map/reduce任务中包含以下代码:<br>
+      
+<span class="codefrag"> System.loadLibrary("mylib.so"); </span>
+      
+</li>
+      
+</ol>
+</div>
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 107 - 0
docs/cn/native_libraries.pdf


+ 574 - 0
docs/cn/quickstart.html

@@ -0,0 +1,574 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title>Hadoop快速入门</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://hadoop.apache.org/core/">项目</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop">维基</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.18文档</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">文档</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">概述</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">快速入门</div>
+</div>
+<div class="menuitem">
+<a href="cluster_setup.html">集群搭建</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_design.html">HDFS构架设计</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_user_guide.html">HDFS使用指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_permissions_guide.html">HDFS权限指南</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_quota_admin_guide.html">HDFS配额管理指南</a>
+</div>
+<div class="menuitem">
+<a href="commands_manual.html">命令手册</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_shell.html">FS Shell使用指南</a>
+</div>
+<div class="menuitem">
+<a href="distcp.html">DistCp使用指南</a>
+</div>
+<div class="menuitem">
+<a href="mapred_tutorial.html">Map-Reduce教程</a>
+</div>
+<div class="menuitem">
+<a href="native_libraries.html">Hadoop本地库</a>
+</div>
+<div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
+<a href="hadoop_archives.html">Hadoop Archives</a>
+</div>
+<div class="menuitem">
+<a href="hod.html">Hadoop On Demand</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/api/index.html">API参考</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html">API Changes</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/">维基</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/FAQ">常见问题</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/mailing_lists.html">邮件列表</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html">发行说明</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html">变更日志</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="quickstart.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Hadoop快速入门</h1>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#%E7%9B%AE%E7%9A%84">目的</a>
+</li>
+<li>
+<a href="#PreReqs">先决条件</a>
+<ul class="minitoc">
+<li>
+<a href="#%E6%94%AF%E6%8C%81%E5%B9%B3%E5%8F%B0">支持平台</a>
+</li>
+<li>
+<a href="#%E6%89%80%E9%9C%80%E8%BD%AF%E4%BB%B6">所需软件</a>
+</li>
+<li>
+<a href="#%E5%AE%89%E8%A3%85%E8%BD%AF%E4%BB%B6">安装软件</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#%E4%B8%8B%E8%BD%BD">下载</a>
+</li>
+<li>
+<a href="#%E8%BF%90%E8%A1%8CHadoop%E9%9B%86%E7%BE%A4%E7%9A%84%E5%87%86%E5%A4%87%E5%B7%A5%E4%BD%9C">运行Hadoop集群的准备工作</a>
+</li>
+<li>
+<a href="#Local">单机模式的操作方法</a>
+</li>
+<li>
+<a href="#PseudoDistributed">伪分布式模式的操作方法</a>
+<ul class="minitoc">
+<li>
+<a href="#%E9%85%8D%E7%BD%AE">配置</a>
+</li>
+<li>
+<a href="#%E5%85%8D%E5%AF%86%E7%A0%81">免密码ssh设置</a>
+</li>
+<li>
+<a href="#%E6%89%A7%E8%A1%8C">执行</a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#FullyDistributed">完全分布式模式的操作方法</a>
+</li>
+</ul>
+</div>
+  
+    
+<a name="N1000D"></a><a name="%E7%9B%AE%E7%9A%84"></a>
+<h2 class="h3">目的</h2>
+<div class="section">
+<p>这篇文档的目的是帮助你快速完成单机上的Hadoop安装与使用以便你对<a href="hdfs_design.html">Hadoop分布式文件系统(<acronym title="Hadoop Distributed File System">HDFS</acronym>)</a>和Map-Reduce框架有所体会,比如在HDFS上运行示例程序或简单作业等。</p>
+</div>
+    
+<a name="N1001F"></a><a name="PreReqs"></a>
+<h2 class="h3">先决条件</h2>
+<div class="section">
+<a name="N10025"></a><a name="%E6%94%AF%E6%8C%81%E5%B9%B3%E5%8F%B0"></a>
+<h3 class="h4">支持平台</h3>
+<ul>
+          
+<li>
+                GNU/Linux是产品开发和运行的平台。
+	        Hadoop已在有2000个节点的GNU/Linux主机组成的集群系统上得到验证。
+          </li>
+          
+<li>
+            Win32平台是作为<em>开发平台</em>支持的。由于分布式操作尚未在Win32平台上充分测试,所以还不作为一个<em>生产平台</em>被支持。
+          </li>
+        
+</ul>
+<a name="N1003B"></a><a name="%E6%89%80%E9%9C%80%E8%BD%AF%E4%BB%B6"></a>
+<h3 class="h4">所需软件</h3>
+<p>Linux和Windows所需软件包括:</p>
+<ol>
+          
+<li>
+            Java<sup>TM</sup>1.5.x,必须安装,建议选择Sun公司发行的Java版本。
+          </li>
+          
+<li>
+            
+<strong>ssh</strong> 必须安装并且保证 <strong>sshd</strong>一直运行,以便用Hadoop
+	    脚本管理远端Hadoop守护进程。
+          </li>
+        
+</ol>
+<p>Windows下的附加软件需求</p>
+<ol>
+            
+<li>
+              
+<a href="http://www.cygwin.com/">Cygwin</a> - 提供上述软件之外的shell支持。 
+            </li>
+          
+</ol>
+<a name="N10064"></a><a name="%E5%AE%89%E8%A3%85%E8%BD%AF%E4%BB%B6"></a>
+<h3 class="h4">安装软件</h3>
+<p>如果你的集群尚未安装所需软件,你得首先安装它们。</p>
+<p>以Ubuntu Linux为例:</p>
+<p>
+          
+<span class="codefrag">$ sudo apt-get install ssh</span>
+<br>
+          
+<span class="codefrag">$ sudo apt-get install rsync</span>
+        
+</p>
+<p>在Windows平台上,如果安装cygwin时未安装全部所需软件,则需启动cyqwin安装管理器安装如下软件包:</p>
+<ul>
+          
+<li>openssh - <em>Net</em> 类</li>
+        
+</ul>
+</div>
+    
+    
+<a name="N10088"></a><a name="%E4%B8%8B%E8%BD%BD"></a>
+<h2 class="h3">下载</h2>
+<div class="section">
+<p>
+        为了获取Hadoop的发行版,从Apache的某个镜像服务器上下载最近的
+        <a href="http://hadoop.apache.org/core/releases.html">稳定发行版</a>。</p>
+</div>
+
+    
+<a name="N10096"></a><a name="%E8%BF%90%E8%A1%8CHadoop%E9%9B%86%E7%BE%A4%E7%9A%84%E5%87%86%E5%A4%87%E5%B7%A5%E4%BD%9C"></a>
+<h2 class="h3">运行Hadoop集群的准备工作</h2>
+<div class="section">
+<p>
+        解压所下载的Hadoop发行版。编辑
+        <span class="codefrag">conf/hadoop-env.sh</span>文件,至少需要将<span class="codefrag">JAVA_HOME</span>设置为Java安装根路径。
+      </p>
+<p>
+	    尝试如下命令:<br>
+        
+<span class="codefrag">$ bin/hadoop</span>
+<br>
+        将会显示<strong>hadoop</strong> 脚本的使用文档。
+      </p>
+<p>现在你可以用以下三种支持的模式中的一种启动Hadoop集群:
+      </p>
+<ul>
+        
+<li>单机模式</li>
+        
+<li>伪分布式模式</li>
+        
+<li>完全分布式模式</li>
+      
+</ul>
+</div>
+    
+    
+<a name="N100C1"></a><a name="Local"></a>
+<h2 class="h3">单机模式的操作方法</h2>
+<div class="section">
+<p>默认情况下,Hadoop被配置成以非分布式模式运行的一个独立Java进程。这对调试非常有帮助。</p>
+<p>
+        下面的实例将已解压的 <span class="codefrag">conf</span> 目录拷贝作为输入,查找并显示匹配给定正则表达式的条目。输出写入到指定的<span class="codefrag">output</span>目录。
+        <br>
+        
+<span class="codefrag">$ mkdir input</span>
+<br>
+        
+<span class="codefrag">$ cp conf/*.xml input</span>
+<br>
+        
+<span class="codefrag">
+          $ bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
+        </span>
+<br>
+        
+<span class="codefrag">$ cat output/*</span>
+      
+</p>
+</div>
+    
+    
+<a name="N100E5"></a><a name="PseudoDistributed"></a>
+<h2 class="h3">伪分布式模式的操作方法</h2>
+<div class="section">
+<p>Hadoop可以在单节点上以所谓的伪分布式模式运行,此时每一个Hadoop守护进程都作为一个独立的Java进程运行。</p>
+<a name="N100EE"></a><a name="%E9%85%8D%E7%BD%AE"></a>
+<h3 class="h4">配置</h3>
+<p>使用如下的 <span class="codefrag">conf/hadoop-site.xml</span>:</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+        
+<tr>
+<td colspan="1" rowspan="1">&lt;configuration&gt;</td>
+</tr>
+
+          
+<tr>
+<td colspan="1" rowspan="1">&nbsp;&nbsp;&lt;property&gt;</td>
+</tr>
+            
+<tr>
+<td colspan="1" rowspan="1">&nbsp;&nbsp;&nbsp;&nbsp;&lt;name&gt;fs.default.name&lt;/name&gt;</td>
+</tr>
+            
+<tr>
+<td colspan="1" rowspan="1">&nbsp;&nbsp;&nbsp;&nbsp;&lt;value&gt;localhost:9000&lt;/value&gt;</td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1">&nbsp;&nbsp;&lt;/property&gt;</td>
+</tr>
+
+          
+<tr>
+<td colspan="1" rowspan="1">&nbsp;&nbsp;&lt;property&gt;</td>
+</tr>
+            
+<tr>
+<td colspan="1" rowspan="1">&nbsp;&nbsp;&nbsp;&nbsp;&lt;name&gt;mapred.job.tracker&lt;/name&gt;</td>
+</tr>
+            
+<tr>
+<td colspan="1" rowspan="1">&nbsp;&nbsp;&nbsp;&nbsp;&lt;value&gt;localhost:9001&lt;/value&gt;</td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1">&nbsp;&nbsp;&lt;/property&gt;</td>
+</tr>
+
+          
+<tr>
+<td colspan="1" rowspan="1">&nbsp;&nbsp;&lt;property&gt;</td>
+</tr>
+            
+<tr>
+<td colspan="1" rowspan="1">&nbsp;&nbsp;&nbsp;&nbsp;&lt;name&gt;dfs.replication&lt;/name&gt;</td>
+</tr>
+            
+<tr>
+<td colspan="1" rowspan="1">&nbsp;&nbsp;&nbsp;&nbsp;&lt;value&gt;1&lt;/value&gt;</td>
+</tr>
+          
+<tr>
+<td colspan="1" rowspan="1">&nbsp;&nbsp;&lt;/property&gt;</td>
+</tr>
+
+        
+<tr>
+<td colspan="1" rowspan="1">&lt;/configuration&gt;</td>
+</tr>
+        
+</table>
+<a name="N10152"></a><a name="%E5%85%8D%E5%AF%86%E7%A0%81"></a>
+<h3 class="h4">免密码ssh设置</h3>
+<p>
+          现在确认能否不输入口令就用ssh登录localhost:<br>
+          
+<span class="codefrag">$ ssh localhost</span>
+        
+</p>
+<p>
+          如果不输入口令就无法用ssh登陆localhost,执行下面的命令:<br>
+   		  
+<span class="codefrag">$ ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa</span>
+<br>
+		  
+<span class="codefrag">$ cat ~/.ssh/id_dsa.pub &gt;&gt; ~/.ssh/authorized_keys</span>
+		
+</p>
+<a name="N10170"></a><a name="%E6%89%A7%E8%A1%8C"></a>
+<h3 class="h4">执行</h3>
+<p>
+          格式化一个新的分布式文件系统:<br>
+          
+<span class="codefrag">$ bin/hadoop namenode -format</span>
+        
+</p>
+<p>
+		  启动Hadoop守护进程:<br>
+          
+<span class="codefrag">$ bin/start-all.sh</span>
+        
+</p>
+<p>Hadoop守护进程的日志写入到 
+        <span class="codefrag">${HADOOP_LOG_DIR}</span> 目录 (默认是 
+        <span class="codefrag">${HADOOP_HOME}/logs</span>).</p>
+<p>浏览NameNode和JobTracker的网络接口,它们的地址默认为:</p>
+<ul>
+          
+<li>
+            
+<span class="codefrag">NameNode</span> - 
+            <a href="http://localhost:50070/">http://localhost:50070/</a>
+          
+</li>
+          
+<li>
+            
+<span class="codefrag">JobTracker</span> - 
+            <a href="http://localhost:50030/">http://localhost:50030/</a>
+          
+</li>
+        
+</ul>
+<p>
+          将输入文件拷贝到分布式文件系统:<br>
+		  
+<span class="codefrag">$ bin/hadoop fs -put conf input</span>
+		
+</p>
+<p>
+          运行发行版提供的示例程序:<br>
+          
+<span class="codefrag">
+            $ bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
+          </span>
+        
+</p>
+<p>查看输出文件:</p>
+<p>
+          将输出文件从分布式文件系统拷贝到本地文件系统查看:<br>
+          
+<span class="codefrag">$ bin/hadoop fs -get output output</span>
+<br>
+          
+<span class="codefrag">$ cat output/*</span>
+        
+</p>
+<p> 或者 </p>
+<p>
+          在分布式文件系统上查看输出文件:<br>
+          
+<span class="codefrag">$ bin/hadoop fs -cat output/*</span>
+        
+</p>
+<p>
+		  完成全部操作后,停止守护进程:<br>
+		  
+<span class="codefrag">$ bin/stop-all.sh</span>
+		
+</p>
+</div>
+    
+    
+<a name="N101DD"></a><a name="FullyDistributed"></a>
+<h2 class="h3">完全分布式模式的操作方法</h2>
+<div class="section">
+<p>关于搭建完全分布式模式的,有实际意义的集群的资料可以在<a href="cluster_setup.html">这里</a>找到。</p>
+</div>
+    
+    
+<p>
+	    
+<em>Java与JNI是Sun Microsystems, Inc.在美国以及其他国家地区的商标或注册商标。</em>
+    
+</p>
+    
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 173 - 0
docs/cn/quickstart.pdf


+ 23 - 0
docs/cn/skin/CommonMessages_de.xml

@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<catalogue>
+  <message key="Font size:">Schriftgrösse:</message>
+  <message key="Last Published:">Zuletzt veröffentlicht:</message>
+  <message key="Search">Suche:</message>
+  <message key="Search the site with">Suche auf der Seite mit</message>
+</catalogue>

+ 23 - 0
docs/cn/skin/CommonMessages_en_US.xml

@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<catalogue>
+  <message  key="Font size:">Font size:</message>
+  <message key="Last Published:">Last Published:</message>
+  <message key="Search">Search</message>
+  <message key="Search the site with">Search site with</message>
+</catalogue>

+ 23 - 0
docs/cn/skin/CommonMessages_es.xml

@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<catalogue>
+  <message key="Font size:">Tamaño del texto:</message>
+  <message key="Last Published:">Fecha de publicación:</message>
+  <message key="Search">Buscar</message>
+  <message key="Search the site with">Buscar en</message>
+</catalogue>

+ 23 - 0
docs/cn/skin/CommonMessages_fr.xml

@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<catalogue>
+  <message key="Font size:">Taille :</message>
+  <message key="Last Published:">Dernière publication :</message>
+  <message key="Search">Rechercher</message>
+  <message key="Search the site with">Rechercher sur le site avec</message>
+</catalogue>

+ 166 - 0
docs/cn/skin/basic.css

@@ -0,0 +1,166 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+/**
+ * General
+ */
+
+img { border: 0; }
+
+#content table {
+  border: 0;
+  width: 100%;
+}
+/*Hack to get IE to render the table at 100%*/
+* html #content table { margin-left: -3px; }
+
+#content th,
+#content td {
+  margin: 0;
+  padding: 0;
+  vertical-align: top;
+}
+
+.clearboth {
+  clear: both;
+}
+
+.note, .warning, .fixme {
+  border: solid black 1px;
+  margin: 1em 3em;
+}
+
+.note .label {
+  background: #369;
+  color: white;
+  font-weight: bold;
+  padding: 5px 10px;
+}
+.note .content {
+  background: #F0F0FF;
+  color: black;
+  line-height: 120%;
+  font-size: 90%;
+  padding: 5px 10px;
+}
+.warning .label {
+  background: #C00;
+  color: white;
+  font-weight: bold;
+  padding: 5px 10px;
+}
+.warning .content {
+  background: #FFF0F0;
+  color: black;
+  line-height: 120%;
+  font-size: 90%;
+  padding: 5px 10px;
+}
+.fixme .label {
+  background: #C6C600;
+  color: black;
+  font-weight: bold;
+  padding: 5px 10px;
+}
+.fixme .content {
+  padding: 5px 10px;
+}
+
+/**
+ * Typography
+ */
+
+body {
+  font-family: verdana, "Trebuchet MS", arial, helvetica, sans-serif;
+  font-size: 100%;
+}
+
+#content {
+  font-family: Georgia, Palatino, Times, serif;
+  font-size: 95%;
+}
+#tabs {
+  font-size: 70%;
+}
+#menu {
+  font-size: 80%;
+}
+#footer {
+  font-size: 70%;
+}
+
+h1, h2, h3, h4, h5, h6 {
+  font-family: "Trebuchet MS", verdana, arial, helvetica, sans-serif;
+  font-weight: bold;
+  margin-top: 1em;
+  margin-bottom: .5em;
+}
+
+h1 {
+    margin-top: 0;
+    margin-bottom: 1em;
+  font-size: 1.4em;
+}
+#content h1 {
+  font-size: 160%;
+  margin-bottom: .5em;
+}
+#menu h1 {
+  margin: 0;
+  padding: 10px;
+  background: #336699;
+  color: white;
+}
+h2 { font-size: 120%; }
+h3 { font-size: 100%; }
+h4 { font-size: 90%; }
+h5 { font-size: 80%; }
+h6 { font-size: 75%; }
+
+p {
+  line-height: 120%;
+  text-align: left;
+  margin-top: .5em;
+  margin-bottom: 1em;
+}
+
+#content li,
+#content th,
+#content td,
+#content li ul,
+#content li ol{
+  margin-top: .5em;
+  margin-bottom: .5em;
+}
+
+
+#content li li,
+#minitoc-area li{
+  margin-top: 0em;
+  margin-bottom: 0em;
+}
+
+#content .attribution {
+  text-align: right;
+  font-style: italic;
+  font-size: 85%;
+  margin-top: 1em;
+}
+
+.codefrag {
+  font-family: "Courier New", Courier, monospace;
+  font-size: 110%;
+}

+ 90 - 0
docs/cn/skin/breadcrumbs-optimized.js

@@ -0,0 +1,90 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+var PREPREND_CRUMBS=new Array();
+var link1="@skinconfig.trail.link1.name@";
+var link2="@skinconfig.trail.link2.name@";
+var link3="@skinconfig.trail.link3.name@";
+if(!(link1=="")&&!link1.indexOf( "@" ) == 0){
+  PREPREND_CRUMBS.push( new Array( link1, @skinconfig.trail.link1.href@ ) ); }
+if(!(link2=="")&&!link2.indexOf( "@" ) == 0){
+  PREPREND_CRUMBS.push( new Array( link2, @skinconfig.trail.link2.href@ ) ); }
+if(!(link3=="")&&!link3.indexOf( "@" ) == 0){
+  PREPREND_CRUMBS.push( new Array( link3, @skinconfig.trail.link3.href@ ) ); }
+var DISPLAY_SEPARATOR=" &gt; ";
+var DISPLAY_PREPREND=" &gt; ";
+var DISPLAY_POSTPREND=":";
+var CSS_CLASS_CRUMB="breadcrumb";
+var CSS_CLASS_TRAIL="breadcrumbTrail";
+var CSS_CLASS_SEPARATOR="crumbSeparator";
+var FILE_EXTENSIONS=new Array( ".html", ".htm", ".jsp", ".php", ".php3", ".php4" );
+var PATH_SEPARATOR="/";
+
+function sc(s) {
+	var l=s.toLowerCase();
+	return l.substr(0,1).toUpperCase()+l.substr(1);
+}
+function getdirs() {
+	var t=document.location.pathname.split(PATH_SEPARATOR);
+	var lc=t[t.length-1];
+	for(var i=0;i < FILE_EXTENSIONS.length;i++)
+	{
+		if(lc.indexOf(FILE_EXTENSIONS[i]))
+			return t.slice(1,t.length-1); }
+	return t.slice(1,t.length);
+}
+function getcrumbs( d )
+{
+	var pre = "/";
+	var post = "/";
+	var c = new Array();
+	if( d != null )
+	{
+		for(var i=0;i < d.length;i++) {
+			pre+=d[i]+postfix;
+			c.push(new Array(d[i],pre)); }
+	}
+	if(PREPREND_CRUMBS.length > 0 )
+		return PREPREND_CRUMBS.concat( c );
+	return c;
+}
+function gettrail( c )
+{
+	var h=DISPLAY_PREPREND;
+	for(var i=0;i < c.length;i++)
+	{
+		h+='<a href="'+c[i][1]+'" >'+sc(c[i][0])+'</a>';
+		if(i!=(c.length-1))
+			h+=DISPLAY_SEPARATOR; }
+	return h+DISPLAY_POSTPREND;
+}
+
+function gettrailXHTML( c )
+{
+	var h='<span class="'+CSS_CLASS_TRAIL+'">'+DISPLAY_PREPREND;
+	for(var i=0;i < c.length;i++)
+	{
+		h+='<a href="'+c[i][1]+'" class="'+CSS_CLASS_CRUMB+'">'+sc(c[i][0])+'</a>';
+		if(i!=(c.length-1))
+			h+='<span class="'+CSS_CLASS_SEPARATOR+'">'+DISPLAY_SEPARATOR+'</span>'; }
+	return h+DISPLAY_POSTPREND+'</span>';
+}
+
+if(document.location.href.toLowerCase().indexOf("http://")==-1)
+	document.write(gettrail(getcrumbs()));
+else
+	document.write(gettrail(getcrumbs(getdirs())));
+

+ 237 - 0
docs/cn/skin/breadcrumbs.js

@@ -0,0 +1,237 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+/**
+ * This script, when included in a html file, builds a neat breadcrumb trail
+ * based on its url. That is, if it doesn't contains bugs (I'm relatively
+ * sure it does).
+ *
+ * Typical usage:
+ * <script type="text/javascript" language="JavaScript" src="breadcrumbs.js"></script>
+ */
+
+/**
+ * IE 5 on Mac doesn't know Array.push.
+ *
+ * Implement it - courtesy to fritz.
+ */
+var abc	= new Array();
+if (!abc.push) {
+  Array.prototype.push	= function(what){this[this.length]=what}
+}
+
+/* ========================================================================
+	CONSTANTS
+   ======================================================================== */
+
+/**
+ * Two-dimensional array containing extra crumbs to place at the front of
+ * the trail. Specify first the name of the crumb, then the URI that belongs
+ * to it. You'll need to modify this for every domain or subdomain where
+ * you use this script (you can leave it as an empty array if you wish)
+ */
+var PREPREND_CRUMBS = new Array();
+
+var link1 = "@skinconfig.trail.link1.name@";
+var link2 = "@skinconfig.trail.link2.name@";
+var link3 = "@skinconfig.trail.link3.name@";
+
+var href1 = "@skinconfig.trail.link1.href@";
+var href2 = "@skinconfig.trail.link2.href@";
+var href3 = "@skinconfig.trail.link3.href@";
+
+   if(!(link1=="")&&!link1.indexOf( "@" ) == 0){
+     PREPREND_CRUMBS.push( new Array( link1, href1 ) );
+   }
+   if(!(link2=="")&&!link2.indexOf( "@" ) == 0){
+     PREPREND_CRUMBS.push( new Array( link2, href2 ) );
+   }
+   if(!(link3=="")&&!link3.indexOf( "@" ) == 0){
+     PREPREND_CRUMBS.push( new Array( link3, href3 ) );
+   }
+
+/**
+ * String to include between crumbs:
+ */
+var DISPLAY_SEPARATOR = " &gt; ";
+/**
+ * String to include at the beginning of the trail
+ */
+var DISPLAY_PREPREND = " &gt; ";
+/**
+ * String to include at the end of the trail
+ */
+var DISPLAY_POSTPREND = "";
+
+/**
+ * CSS Class to use for a single crumb:
+ */
+var CSS_CLASS_CRUMB = "breadcrumb";
+
+/**
+ * CSS Class to use for the complete trail:
+ */
+var CSS_CLASS_TRAIL = "breadcrumbTrail";
+
+/**
+ * CSS Class to use for crumb separator:
+ */
+var CSS_CLASS_SEPARATOR = "crumbSeparator";
+
+/**
+ * Array of strings containing common file extensions. We use this to
+ * determine what part of the url to ignore (if it contains one of the
+ * string specified here, we ignore it).
+ */
+var FILE_EXTENSIONS = new Array( ".html", ".htm", ".jsp", ".php", ".php3", ".php4" );
+
+/**
+ * String that separates parts of the breadcrumb trail from each other.
+ * When this is no longer a slash, I'm sure I'll be old and grey.
+ */
+var PATH_SEPARATOR = "/";
+
+/* ========================================================================
+	UTILITY FUNCTIONS
+   ======================================================================== */
+/**
+ * Capitalize first letter of the provided string and return the modified
+ * string.
+ */
+function sentenceCase( string )
+{        return string;
+	//var lower = string.toLowerCase();
+	//return lower.substr(0,1).toUpperCase() + lower.substr(1);
+}
+
+/**
+ * Returns an array containing the names of all the directories in the
+ * current document URL
+ */
+function getDirectoriesInURL()
+{
+	var trail = document.location.pathname.split( PATH_SEPARATOR );
+
+	// check whether last section is a file or a directory
+	var lastcrumb = trail[trail.length-1];
+	for( var i = 0; i < FILE_EXTENSIONS.length; i++ )
+	{
+		if( lastcrumb.indexOf( FILE_EXTENSIONS[i] ) )
+		{
+			// it is, remove it and send results
+			return trail.slice( 1, trail.length-1 );
+		}
+	}
+
+	// it's not; send the trail unmodified
+	return trail.slice( 1, trail.length );
+}
+
+/* ========================================================================
+	BREADCRUMB FUNCTIONALITY
+   ======================================================================== */
+/**
+ * Return a two-dimensional array describing the breadcrumbs based on the
+ * array of directories passed in.
+ */
+function getBreadcrumbs( dirs )
+{
+	var prefix = "/";
+	var postfix = "/";
+
+	// the array we will return
+	var crumbs = new Array();
+
+	if( dirs != null )
+	{
+		for( var i = 0; i < dirs.length; i++ )
+		{
+			prefix += dirs[i] + postfix;
+			crumbs.push( new Array( dirs[i], prefix ) );
+		}
+	}
+
+	// preprend the PREPREND_CRUMBS
+	if(PREPREND_CRUMBS.length > 0 )
+	{
+		return PREPREND_CRUMBS.concat( crumbs );
+	}
+
+	return crumbs;
+}
+
+/**
+ * Return a string containing a simple text breadcrumb trail based on the
+ * two-dimensional array passed in.
+ */
+function getCrumbTrail( crumbs )
+{
+	var xhtml = DISPLAY_PREPREND;
+
+	for( var i = 0; i < crumbs.length; i++ )
+	{
+		xhtml += '<a href="' + crumbs[i][1] + '" >';
+		xhtml += unescape( crumbs[i][0] ) + '</a>';
+		if( i != (crumbs.length-1) )
+		{
+			xhtml += DISPLAY_SEPARATOR;
+		}
+	}
+
+	xhtml += DISPLAY_POSTPREND;
+
+	return xhtml;
+}
+
+/**
+ * Return a string containing an XHTML breadcrumb trail based on the
+ * two-dimensional array passed in.
+ */
+function getCrumbTrailXHTML( crumbs )
+{
+	var xhtml = '<span class="' + CSS_CLASS_TRAIL  + '">';
+	xhtml += DISPLAY_PREPREND;
+
+	for( var i = 0; i < crumbs.length; i++ )
+	{
+		xhtml += '<a href="' + crumbs[i][1] + '" class="' + CSS_CLASS_CRUMB + '">';
+		xhtml += unescape( crumbs[i][0] ) + '</a>';
+		if( i != (crumbs.length-1) )
+		{
+			xhtml += '<span class="' + CSS_CLASS_SEPARATOR + '">' + DISPLAY_SEPARATOR + '</span>';
+		}
+	}
+
+	xhtml += DISPLAY_POSTPREND;
+	xhtml += '</span>';
+
+	return xhtml;
+}
+
+/* ========================================================================
+	PRINT BREADCRUMB TRAIL
+   ======================================================================== */
+
+// check if we're local; if so, only print the PREPREND_CRUMBS
+if( document.location.href.toLowerCase().indexOf( "http://" ) == -1 )
+{
+	document.write( getCrumbTrail( getBreadcrumbs() ) );
+}
+else
+{
+	document.write( getCrumbTrail( getBreadcrumbs( getDirectoriesInURL() ) ) );
+}
+

+ 166 - 0
docs/cn/skin/fontsize.js

@@ -0,0 +1,166 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+function init() 
+{ //embedded in the doc
+  //ndeSetTextSize();
+}
+
+function checkBrowser(){
+  if (!document.getElementsByTagName){
+    return true;
+  }
+  else{
+    return false;
+  }
+}
+
+
+function ndeSetTextSize(chgsize,rs) 
+{
+  var startSize;
+  var newSize;
+
+  if (!checkBrowser)
+  {
+    return;
+  }
+
+  startSize = parseInt(ndeGetDocTextSize());
+
+  if (!startSize)
+  {
+    startSize = 16;
+  }
+
+  switch (chgsize)
+  {
+  case 'incr':
+    newSize = startSize + 2;
+    break;
+
+  case 'decr':
+    newSize = startSize - 2;
+    break;
+
+  case 'reset':
+    if (rs) {newSize = rs;} else {newSize = 16;}
+    break;
+
+  default:
+    try{
+      newSize = parseInt(ndeReadCookie("nde-textsize"));
+    }
+    catch(e){
+      alert(e);
+    }
+    
+    if (!newSize || newSize == 'NaN')
+    {
+      newSize = startSize;
+    }
+    break;
+
+  }
+
+  if (newSize < 10) 
+  {
+    newSize = 10;
+  }
+
+  newSize += 'px';
+
+  document.getElementsByTagName('html')[0].style.fontSize = newSize;
+  document.getElementsByTagName('body')[0].style.fontSize = newSize;
+
+  ndeCreateCookie("nde-textsize", newSize, 365);
+}
+
+function ndeGetDocTextSize() 
+{
+  if (!checkBrowser)
+  {
+    return 0;
+  }
+
+  var size = 0;
+  var body = document.getElementsByTagName('body')[0];
+
+  if (body.style && body.style.fontSize)
+  {
+    size = body.style.fontSize;
+  }
+  else if (typeof(getComputedStyle) != 'undefined')
+  {
+    size = getComputedStyle(body,'').getPropertyValue('font-size');
+  }
+  else if (body.currentStyle)
+  {
+   size = body.currentStyle.fontSize;
+  }
+
+  //fix IE bug
+  if( isNaN(size)){
+    if(size.substring(size.length-1)=="%"){
+      return
+    }
+
+  }
+
+  return size;
+
+}
+
+
+
+function ndeCreateCookie(name,value,days) 
+{
+  var cookie = name + "=" + value + ";";
+
+  if (days) 
+  {
+    var date = new Date();
+    date.setTime(date.getTime()+(days*24*60*60*1000));
+    cookie += " expires=" + date.toGMTString() + ";";
+  }
+  cookie += " path=/";
+
+  document.cookie = cookie;
+
+}
+
+function ndeReadCookie(name) 
+{
+  var nameEQ = name + "=";
+  var ca = document.cookie.split(';');
+
+ 
+  for(var i = 0; i < ca.length; i++) 
+  {
+    var c = ca[i];
+    while (c.charAt(0) == ' ') 
+    {
+      c = c.substring(1, c.length);
+    }
+
+    ctest = c.substring(0,name.length);
+ 
+    if(ctest == name){
+      return c.substring(nameEQ.length,c.length);
+    }
+  }
+  return null;
+}

+ 40 - 0
docs/cn/skin/getBlank.js

@@ -0,0 +1,40 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+/**
+ * getBlank script - when included in a html file and called from a form text field, will set the value of this field to ""
+ * if the text value is still the standard value.
+ * getPrompt script - when included in a html file and called from a form text field, will set the value of this field to the prompt
+ * if the text value is empty.
+ *
+ * Typical usage:
+ * <script type="text/javascript" language="JavaScript" src="getBlank.js"></script>
+ * <input type="text" id="query" value="Search the site:" onFocus="getBlank (this, 'Search the site:');" onBlur="getBlank (this, 'Search the site:');"/>
+ */
+<!--
+function getBlank (form, stdValue){
+if (form.value == stdValue){
+	form.value = '';
+	}
+return true;
+}
+function getPrompt (form, stdValue){
+if (form.value == ''){
+	form.value = stdValue;
+	}
+return true;
+}
+//-->

+ 45 - 0
docs/cn/skin/getMenu.js

@@ -0,0 +1,45 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+/**
+ * This script, when included in a html file, can be used to make collapsible menus
+ *
+ * Typical usage:
+ * <script type="text/javascript" language="JavaScript" src="menu.js"></script>
+ */
+
+if (document.getElementById){ 
+  document.write('<style type="text/css">.menuitemgroup{display: none;}</style>')
+}
+
+
+function SwitchMenu(obj, thePath)
+{
+var open = 'url("'+thePath + 'images/chapter_open.gif")';
+var close = 'url("'+thePath + 'images/chapter.gif")';
+  if(document.getElementById)  {
+    var el = document.getElementById(obj);
+    var title = document.getElementById(obj+'Title');
+
+    if(el.style.display != "block"){ 
+      title.style.backgroundImage = open;
+      el.style.display = "block";
+    }else{
+      title.style.backgroundImage = close;
+      el.style.display = "none";
+    }
+  }// end -  if(document.getElementById) 
+}//end - function SwitchMenu(obj)

+ 1 - 0
docs/cn/skin/images/README.txt

@@ -0,0 +1 @@
+The images in this directory are used if the current skin lacks them.

BIN
docs/cn/skin/images/add.jpg


BIN
docs/cn/skin/images/built-with-forrest-button.png


BIN
docs/cn/skin/images/chapter.gif


BIN
docs/cn/skin/images/chapter_open.gif


+ 92 - 0
docs/cn/skin/images/corner-imports.svg.xslt

@@ -0,0 +1,92 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+  <xsl:param name="orientation-tb"/>
+  <xsl:param name="orientation-lr"/>
+  <xsl:param name="size"/>
+  <xsl:param name="bg-color-name"/>
+  <xsl:param name="stroke-color-name"/>
+  <xsl:param name="fg-color-name"/>
+<!-- if not all colors are present, don't even try to render the corners -->
+  <xsl:variable name="isize">
+    <xsl:choose>
+      <xsl:when test="$bg-color-name and $stroke-color-name and $fg-color-name">
+        <xsl:value-of select="$size"/>
+      </xsl:when>
+      <xsl:otherwise>0</xsl:otherwise>
+    </xsl:choose>
+  </xsl:variable>
+  <xsl:variable name="smallersize" select="number($isize)-1"/>
+  <xsl:variable name="biggersize" select="number($isize)+1"/>
+  <xsl:variable name="bg">
+    <xsl:if test="skinconfig/colors/color[@name=$bg-color-name]">fill:<xsl:value-of select="skinconfig/colors/color[@name=$bg-color-name]/@value"/>;</xsl:if>
+  </xsl:variable>
+  <xsl:variable name="fill">
+    <xsl:if test="skinconfig/colors/color[@name=$stroke-color-name]">fill:<xsl:value-of select="skinconfig/colors/color[@name=$stroke-color-name]/@value"/>;</xsl:if>
+  </xsl:variable>
+  <xsl:variable name="stroke">
+    <xsl:if test="skinconfig/colors/color[@name=$fg-color-name]">stroke:<xsl:value-of select="skinconfig/colors/color[@name=$fg-color-name]/@value"/>;</xsl:if>
+  </xsl:variable>
+  <xsl:template match="skinconfig">
+    <svg width="{$isize}" height="{$isize}">
+<!-- background-->
+      <rect x="-1" y="-1" width="{$biggersize}" height="{$biggersize}" style="{$bg}stroke-width:0"/>
+<!-- 0,0 0,-4 4,0 4,-4-->
+      <xsl:variable name="flip-tb-scale">
+        <xsl:choose>
+          <xsl:when test="$orientation-tb='t'">1</xsl:when>
+          <xsl:otherwise>-1</xsl:otherwise>
+        </xsl:choose>
+      </xsl:variable>
+      <xsl:variable name="flip-lr-scale">
+        <xsl:choose>
+          <xsl:when test="$orientation-lr='l'">1</xsl:when>
+          <xsl:otherwise>-1</xsl:otherwise>
+        </xsl:choose>
+      </xsl:variable>
+      <xsl:variable name="flip-tb-translate">
+        <xsl:choose>
+          <xsl:when test="$orientation-tb='t'">0</xsl:when>
+          <xsl:otherwise>-<xsl:value-of select="$isize" />
+          </xsl:otherwise>
+        </xsl:choose>
+      </xsl:variable>
+      <xsl:variable name="flip-lr-translate">
+        <xsl:choose>
+          <xsl:when test="$orientation-lr='l'">0</xsl:when>
+          <xsl:otherwise>-<xsl:value-of select="$isize" />
+          </xsl:otherwise>
+        </xsl:choose>
+      </xsl:variable>
+<!-- flip transform -->
+      <g transform="scale({$flip-lr-scale},{$flip-tb-scale}) translate({$flip-lr-translate}, {$flip-tb-translate})">
+        <xsl:call-template name="figure" />
+      </g>
+    </svg>
+  </xsl:template>
+  <xsl:template name="figure">
+<!-- Just change shape here -->
+    <g transform="translate(0.5 0.5)">
+      <ellipse cx="{$smallersize}" cy="{$smallersize}" rx="{$smallersize}" ry="{$smallersize}"
+				 style="{$fill}{$stroke}stroke-width:1"/>
+    </g>
+<!-- end -->
+  </xsl:template>
+  <xsl:template match="*"></xsl:template>
+  <xsl:template match="text()"></xsl:template>
+</xsl:stylesheet>

BIN
docs/cn/skin/images/current.gif


+ 28 - 0
docs/cn/skin/images/dc.svg.xslt

@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+  <xsl:import href="corner-imports.svg.xslt" />
+<!-- Diagonal 45 degrees corner -->
+  <xsl:template name="figure">
+    <xsl:variable name="biggersize" select="number($size)+number($size)"/>
+    <g transform="translate(0 0.5)">
+      <polygon points="0,{$size} {$size},0 {$biggersize},0 {$biggersize},{$biggersize} 0,{$biggersize}"
+                    style="{$fill}{$stroke}stroke-width:1"/>
+    </g>
+  </xsl:template>
+</xsl:stylesheet>

BIN
docs/cn/skin/images/error.png


BIN
docs/cn/skin/images/external-link.gif


BIN
docs/cn/skin/images/fix.jpg


BIN
docs/cn/skin/images/forrest-credit-logo.png


BIN
docs/cn/skin/images/hack.jpg


BIN
docs/cn/skin/images/header_white_line.gif


BIN
docs/cn/skin/images/info.png


BIN
docs/cn/skin/images/instruction_arrow.png


BIN
docs/cn/skin/images/label.gif


BIN
docs/cn/skin/images/page.gif


BIN
docs/cn/skin/images/pdfdoc.gif


BIN
docs/cn/skin/images/poddoc.png


+ 55 - 0
docs/cn/skin/images/poddoc.svg.xslt

@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<svg width="20pt" height="20pt"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:xlink="http://www.w3.org/1999/xlink">
+  <defs
+     id="defs550">
+    <linearGradient id="gray2white">
+      <stop style="stop-color:#7f7f7f;stop-opacity:1;" offset="0.000000"/>
+      <stop style="stop-color:#ffffff;stop-opacity:1;" offset="1.000000"/>
+    </linearGradient>
+    <linearGradient id="pageshade" xlink:href="#gray2white"
+       x1="0.95" y1="0.95"
+       x2="0.40" y2="0.20"
+       gradientUnits="objectBoundingBox" spreadMethod="pad" />
+    <path d="M 0 0 L 200 0" style="stroke:#000000;stroke-width:1pt;" id="hr"/>
+  </defs>
+  <g transform="scale(0.08)">
+    <g transform="translate(40, 0)">
+      <rect width="230" height="300" x="0" y="0"
+            style="fill:url(#pageshade);fill-rule:evenodd;
+            stroke:#000000;stroke-width:1.25;"/>
+      <g transform="translate(15, 60)">
+        <use xlink:href="#hr" x="0" y="0"/>
+        <use xlink:href="#hr" x="0" y="60"/>
+        <use xlink:href="#hr" x="0" y="120"/>
+        <use xlink:href="#hr" x="0" y="180"/>
+      </g>
+    </g>
+    <g transform="translate(0,70),scale(1.1,1.6)">
+      <rect width="200" height="100" x="0" y="0"
+         style="fill:#ff0000;fill-rule:evenodd;
+                stroke:#000000;stroke-width:2.33903;"/>
+      <text x="20" y="75"
+            style="stroke:#ffffff;stroke-width:1.0;
+                   font-size:72;font-weight:normal;fill:#ffffff;
+                   font-family:Arial;text-anchor:start;">POD</text>
+    </g>
+  </g>
+</svg>

BIN
docs/cn/skin/images/printer.gif


BIN
docs/cn/skin/images/rc-b-l-15-1body-2menu-3menu.png


BIN
docs/cn/skin/images/rc-b-r-15-1body-2menu-3menu.png


BIN
docs/cn/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png


BIN
docs/cn/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png


BIN
docs/cn/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png


BIN
docs/cn/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png


BIN
docs/cn/skin/images/rc-t-r-15-1body-2menu-3menu.png


BIN
docs/cn/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png


BIN
docs/cn/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png


BIN
docs/cn/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png


+ 27 - 0
docs/cn/skin/images/rc.svg.xslt

@@ -0,0 +1,27 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+  <xsl:import href="corner-imports.svg.xslt" />
+<!-- Rounded corner -->
+  <xsl:template name="figure">
+    <g transform="translate(0.5 0.5)">
+      <ellipse cx="{$smallersize}" cy="{$smallersize}" rx="{$smallersize}" ry="{$smallersize}"
+				 style="{$fill}{$stroke}stroke-width:1"/>
+    </g>
+  </xsl:template>
+</xsl:stylesheet>

BIN
docs/cn/skin/images/remove.jpg


BIN
docs/cn/skin/images/rss.png


BIN
docs/cn/skin/images/spacer.gif


BIN
docs/cn/skin/images/success.png


BIN
docs/cn/skin/images/txtdoc.png


+ 55 - 0
docs/cn/skin/images/txtdoc.svg.xslt

@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<svg width="20pt" height="20pt"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:xlink="http://www.w3.org/1999/xlink">
+  <defs
+     id="defs550">
+    <linearGradient id="gray2white">
+      <stop style="stop-color:#7f7f7f;stop-opacity:1;" offset="0.000000"/>
+      <stop style="stop-color:#ffffff;stop-opacity:1;" offset="1.000000"/>
+    </linearGradient>
+    <linearGradient id="pageshade" xlink:href="#gray2white"
+       x1="0.95" y1="0.95"
+       x2="0.40" y2="0.20"
+       gradientUnits="objectBoundingBox" spreadMethod="pad" />
+    <path d="M 0 0 L 200 0" style="stroke:#000000;stroke-width:1pt;" id="hr"/>
+  </defs>
+  <g transform="scale(0.08)">
+    <g transform="translate(40, 0)">
+      <rect width="230" height="300" x="0" y="0"
+            style="fill:url(#pageshade);fill-rule:evenodd;
+            stroke:#000000;stroke-width:1.25;"/>
+      <g transform="translate(15, 60)">
+        <use xlink:href="#hr" x="0" y="0"/>
+        <use xlink:href="#hr" x="0" y="60"/>
+        <use xlink:href="#hr" x="0" y="120"/>
+        <use xlink:href="#hr" x="0" y="180"/>
+      </g>
+    </g>
+    <g transform="translate(0,70),scale(1.1,1.6)">
+      <rect width="200" height="100" x="0" y="0"
+         style="fill:#ff0000;fill-rule:evenodd;
+                stroke:#000000;stroke-width:2.33903;"/>
+      <text x="20" y="75"
+            style="stroke:#ffffff;stroke-width:1.0;
+                   font-size:72;font-weight:normal;fill:#ffffff;
+                   font-family:Arial;text-anchor:start;">TXT</text>
+    </g>
+  </g>
+</svg>

BIN
docs/cn/skin/images/update.jpg


BIN
docs/cn/skin/images/valid-html401.png


BIN
docs/cn/skin/images/vcss.png


Một số tệp đã không được hiển thị bởi vì quá nhiều tập tin thay đổi trong này khác