فهرست منبع

Merge -r601498:601818 from trunk to 0.15 branch. Fixes: HADOOP-2160 and HADOOP-1327.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/branches/branch-0.15@601849 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 17 سال پیش
والد
کامیت
41d1d927e1
51فایلهای تغییر یافته به همراه1940 افزوده شده و 4402 حذف شده
  1. 7 0
      CHANGES.txt
  2. 9 1
      build.xml
  3. 0 1
      docs/.htaccess
  4. 12 39
      docs/cluster_setup.html
  5. 0 265
      docs/credits.html
  6. 0 261
      docs/credits.pdf
  7. 0 44
      docs/doap.rdf
  8. 0 248
      docs/documentation.html
  9. 0 196
      docs/documentation.pdf
  10. 12 39
      docs/hdfs_design.html
  11. 37 141
      docs/index.html
  12. 9 37
      docs/index.pdf
  13. 0 220
      docs/irc.html
  14. 0 94
      docs/irc.pdf
  15. 0 223
      docs/issue_tracking.html
  16. 0 126
      docs/issue_tracking.pdf
  17. 17 112
      docs/linkmap.html
  18. 12 12
      docs/linkmap.pdf
  19. BIN
      docs/logos/elephant_rgb.jpg
  20. BIN
      docs/logos/elephant_rgb.pdf
  21. BIN
      docs/logos/elephant_rgb.png
  22. BIN
      docs/logos/hadoop+elephant_rgb.jpg
  23. BIN
      docs/logos/hadoop+elephant_rgb.pdf
  24. BIN
      docs/logos/hadoop+elephant_rgb.png
  25. BIN
      docs/logos/hadoop_rgb.jpg
  26. BIN
      docs/logos/hadoop_rgb.pdf
  27. BIN
      docs/logos/hadoop_rgb.png
  28. 0 318
      docs/mailing_lists.html
  29. 0 63
      docs/mailing_lists.pdf
  30. 12 39
      docs/mapred_tutorial.html
  31. 13 40
      docs/quickstart.html
  32. 50 50
      docs/quickstart.pdf
  33. 0 322
      docs/releases.html
  34. 0 416
      docs/releases.pdf
  35. 831 0
      docs/streaming.html
  36. 347 0
      docs/streaming.pdf
  37. 0 275
      docs/version_control.html
  38. 0 384
      docs/version_control.pdf
  39. 0 1
      src/docs/src/documentation/content/.htaccess
  40. 0 32
      src/docs/src/documentation/content/xdocs/credits.xml
  41. 0 28
      src/docs/src/documentation/content/xdocs/documentation.xml
  42. 20 79
      src/docs/src/documentation/content/xdocs/index.xml
  43. 0 23
      src/docs/src/documentation/content/xdocs/irc.xml
  44. 0 22
      src/docs/src/documentation/content/xdocs/issue_tracking.xml
  45. 0 72
      src/docs/src/documentation/content/xdocs/mailing_lists.xml
  46. 1 1
      src/docs/src/documentation/content/xdocs/quickstart.xml
  47. 0 102
      src/docs/src/documentation/content/xdocs/releases.xml
  48. 7 20
      src/docs/src/documentation/content/xdocs/site.xml
  49. 542 0
      src/docs/src/documentation/content/xdocs/streaming.xml
  50. 2 1
      src/docs/src/documentation/content/xdocs/tabs.xml
  51. 0 55
      src/docs/src/documentation/content/xdocs/version_control.xml

+ 7 - 0
CHANGES.txt

@@ -15,6 +15,13 @@ Branch 0.15 (unreleased)
     HDFS but not the default filesystem.  HDFS paths returned by the
     HDFS but not the default filesystem.  HDFS paths returned by the
     listStatus() method are now fully-qualified.  (cutting)
     listStatus() method are now fully-qualified.  (cutting)
 
 
+  IMPROVEMENTS
+
+    HADOOP-2160.  Remove project-level, non-user documentation from
+    releases, since it's now maintained in a separate tree.
+
+    HADOOP-1327.  Add user documentation for streaming.
+
 
 
 Release 0.15.1 - 2007-11-27
 Release 0.15.1 - 2007-11-27
 
 

+ 9 - 1
build.xml

@@ -602,6 +602,13 @@
   <!-- ================================================================== -->
   <!-- ================================================================== -->
   <!-- Documentation                                                      -->
   <!-- Documentation                                                      -->
   <!-- ================================================================== -->
   <!-- ================================================================== -->
+  <target name="docs">
+    <exec dir="src/docs" executable="forrest" failonerror="true" />
+    <copy todir="docs/">
+      <fileset dir="src/docs/build/site/" />
+    </copy>
+  </target>
+
   <target name="javadoc" depends="default-doc">
   <target name="javadoc" depends="default-doc">
     <mkdir dir="${build.javadoc}"/>
     <mkdir dir="${build.javadoc}"/>
     <javadoc
     <javadoc
@@ -754,7 +761,8 @@
   <!-- Clean.  Delete the build files, and their directories              -->
   <!-- Clean.  Delete the build files, and their directories              -->
   <!-- ================================================================== -->
   <!-- ================================================================== -->
   <target name="clean" depends="clean-contrib">
   <target name="clean" depends="clean-contrib">
-  	<delete dir="${build.dir}"/>
+    <delete dir="${build.dir}"/>
+    <delete dir="src/docs/build"/>
   </target>
   </target>
 
 
   <!-- ================================================================== -->
   <!-- ================================================================== -->

+ 0 - 1
docs/.htaccess

@@ -1 +0,0 @@
-RedirectMatch Permanent ^/hadoop/about(.*) http://lucene.apache.org/hadoop/index$1

+ 12 - 39
docs/cluster_setup.html

@@ -60,12 +60,15 @@
     |start Tabs
     |start Tabs
     +-->
     +-->
 <ul id="tabs">
 <ul id="tabs">
-<li class="current">
-<a class="selected" href="index.html">Main</a>
+<li>
+<a class="unselected" href="http://lucene.apache.org/hadoop/">Project</a>
 </li>
 </li>
 <li>
 <li>
 <a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
 <a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
 </li>
 </li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.15 Documentation</a>
+</li>
 </ul>
 </ul>
 <!--+
 <!--+
     |end Tabs
     |end Tabs
@@ -99,25 +102,10 @@ document.write("Last Published: " + document.lastModified);
     |start Menu
     |start Menu
     +-->
     +-->
 <div id="menu">
 <div id="menu">
-<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Project</div>
-<div id="menu_1.1" class="menuitemgroup">
-<div class="menuitem">
-<a href="releases.html">Releases</a>
-</div>
-<div class="menuitem">
-<a href="releases.html#News">News</a>
-</div>
-<div class="menuitem">
-<a href="credits.html">Credits</a>
-</div>
-<div class="menuitem">
-<a href="http://www.cafepress.com/hadoop/">Buy Stuff</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_selected_1.2', 'skin/')" id="menu_selected_1.2Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Documentation</div>
-<div id="menu_selected_1.2" class="selectedmenuitemgroup" style="display: block;">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Documentation</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
 <div class="menuitem">
 <div class="menuitem">
-<a href="documentation.html">Overview</a>
+<a href="index.html">Overview</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
 <a href="quickstart.html">Quickstart</a>
 <a href="quickstart.html">Quickstart</a>
@@ -132,6 +120,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
 <a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
 <a href="api/index.html">API Docs</a>
 <a href="api/index.html">API Docs</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
@@ -141,25 +132,7 @@ document.write("Last Published: " + document.lastModified);
 <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
 <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
-<a href="mailing_lists.html#Users">Mailing Lists</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">Developers</div>
-<div id="menu_1.3" class="menuitemgroup">
-<div class="menuitem">
-<a href="mailing_lists.html#Developers">Mailing Lists</a>
-</div>
-<div class="menuitem">
-<a href="issue_tracking.html">Issue Tracking</a>
-</div>
-<div class="menuitem">
-<a href="version_control.html">Version Control</a>
-</div>
-<div class="menuitem">
-<a href="http://lucene.zones.apache.org:8080/hudson/job/Hadoop-Nightly/">Nightly Build</a>
-</div>
-<div class="menuitem">
-<a href="irc.html">IRC Channel</a>
+<a href="http://lucene.apache.org/hadoop/mailing_lists.html">Mailing Lists</a>
 </div>
 </div>
 </div>
 </div>
 <div id="credit"></div>
 <div id="credit"></div>

+ 0 - 265
docs/credits.html

@@ -1,265 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html>
-<head>
-<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
-<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
-<meta name="Forrest-skin-name" content="pelt">
-<title>Hadoop credits</title>
-<link type="text/css" href="skin/basic.css" rel="stylesheet">
-<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
-<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
-<link type="text/css" href="skin/profile.css" rel="stylesheet">
-<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
-<link rel="shortcut icon" href="images/favicon.ico">
-</head>
-<body onload="init()">
-<script type="text/javascript">ndeSetTextSize();</script>
-<div id="top">
-<!--+
-    |breadtrail
-    +-->
-<div class="breadtrail">
-<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/hadoop/">Hadoop</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
-</div>
-<!--+
-    |header
-    +-->
-<div class="header">
-<!--+
-    |start group logo
-    +-->
-<div class="grouplogo">
-<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="images/lucene_green_150.gif" title="Apache Lucene"></a>
-</div>
-<!--+
-    |end group logo
-    +-->
-<!--+
-    |start Project Logo
-    +-->
-<div class="projectlogo">
-<a href="http://lucene.apache.org/hadoop/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Scalable Computing Platform"></a>
-</div>
-<!--+
-    |end Project Logo
-    +-->
-<!--+
-    |start Search
-    +-->
-<div class="searchbox">
-<form action="http://www.google.com/search" method="get" class="roundtopsmall">
-<input value="lucene.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
-                    <input name="Search" value="Search" type="submit">
-</form>
-</div>
-<!--+
-    |end search
-    +-->
-<!--+
-    |start Tabs
-    +-->
-<ul id="tabs">
-<li class="current">
-<a class="selected" href="index.html">Main</a>
-</li>
-<li>
-<a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
-</li>
-</ul>
-<!--+
-    |end Tabs
-    +-->
-</div>
-</div>
-<div id="main">
-<div id="publishedStrip">
-<!--+
-    |start Subtabs
-    +-->
-<div id="level2tabs"></div>
-<!--+
-    |end Endtabs
-    +-->
-<script type="text/javascript"><!--
-document.write("Last Published: " + document.lastModified);
-//  --></script>
-</div>
-<!--+
-    |breadtrail
-    +-->
-<div class="breadtrail">
-
-             &nbsp;
-           </div>
-<!--+
-    |start Menu, mainarea
-    +-->
-<!--+
-    |start Menu
-    +-->
-<div id="menu">
-<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Project</div>
-<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
-<div class="menuitem">
-<a href="releases.html">Releases</a>
-</div>
-<div class="menuitem">
-<a href="releases.html#News">News</a>
-</div>
-<div class="menupage">
-<div class="menupagetitle">Credits</div>
-</div>
-<div class="menuitem">
-<a href="http://www.cafepress.com/hadoop/">Buy Stuff</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Documentation</div>
-<div id="menu_1.2" class="menuitemgroup">
-<div class="menuitem">
-<a href="documentation.html">Overview</a>
-</div>
-<div class="menuitem">
-<a href="quickstart.html">Quickstart</a>
-</div>
-<div class="menuitem">
-<a href="cluster_setup.html">Cluster Setup</a>
-</div>
-<div class="menuitem">
-<a href="hdfs_design.html">HDFS Architecture</a>
-</div>
-<div class="menuitem">
-<a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
-</div>
-<div class="menuitem">
-<a href="api/index.html">API Docs</a>
-</div>
-<div class="menuitem">
-<a href="http://wiki.apache.org/lucene-hadoop/">Wiki</a>
-</div>
-<div class="menuitem">
-<a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
-</div>
-<div class="menuitem">
-<a href="mailing_lists.html#Users">Mailing Lists</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">Developers</div>
-<div id="menu_1.3" class="menuitemgroup">
-<div class="menuitem">
-<a href="mailing_lists.html#Developers">Mailing Lists</a>
-</div>
-<div class="menuitem">
-<a href="issue_tracking.html">Issue Tracking</a>
-</div>
-<div class="menuitem">
-<a href="version_control.html">Version Control</a>
-</div>
-<div class="menuitem">
-<a href="http://lucene.zones.apache.org:8080/hudson/job/Hadoop-Nightly/">Nightly Build</a>
-</div>
-<div class="menuitem">
-<a href="irc.html">IRC Channel</a>
-</div>
-</div>
-<div id="credit"></div>
-<div id="roundbottom">
-<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
-<!--+
-  |alternative credits
-  +-->
-<div id="credit2"></div>
-</div>
-<!--+
-    |end Menu
-    +-->
-<!--+
-    |start content
-    +-->
-<div id="content">
-<div title="Portable Document Format" class="pdflink">
-<a class="dida" href="credits.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
-        PDF</a>
-</div>
-<h1>Hadoop credits</h1>
-<div id="minitoc-area">
-<ul class="minitoc">
-<li>
-<a href="#Committers">Committers</a>
-</li>
-</ul>
-</div>
-
-
-<a name="N1000C"></a><a name="Committers"></a>
-<h2 class="h3">Committers</h2>
-<div class="section">
-<ul>
-  
-<li>Andrzej Bialecki</li>
-  
-<li>Mike Cafarella</li>
-  
-<li>
-<a href="http://blog.lucene.com/">Doug Cutting</a>
-</li>
-  
-<li>
-<a href="http://people.apache.org/~nigel">Nigel Daley</a>
-</li>
-  
-<li>Jim Kellerman (contrib/hbase)</li>
-  
-<li>
-<a href="http://people.apache.org/~omalley">Owen O'Malley</a>
-</li>
-  
-<li>
-<a href="http://weblogs.java.net/blog/tomwhite/">Tom White</a>
-</li>
-  
-<li>
-<a href="http://people.apache.org/~acmurthy">Arun C Murthy</a>
-</li>
-  
-<li>
-<a href="http://people.apache.org/~ddas">Devaraj Das</a>
-</li>
-  
-<li>
-<a href="http://people.apache.org/~enis">Enis Soztutar</a>
-</li>
-  
-<li>
-<a href="http://people.apache.org/~taton">Christophe Taton</a>
-</li>
-
-</ul>
-</div>
-
-
-</div>
-<!--+
-    |end content
-    +-->
-<div class="clearboth">&nbsp;</div>
-</div>
-<div id="footer">
-<!--+
-    |start bottomstrip
-    +-->
-<div class="lastmodified">
-<script type="text/javascript"><!--
-document.write("Last Published: " + document.lastModified);
-//  --></script>
-</div>
-<div class="copyright">
-        Copyright &copy;
-         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
-</div>
-<!--+
-    |end bottomstrip
-    +-->
-</div>
-</body>
-</html>

+ 0 - 261
docs/credits.pdf

@@ -1,261 +0,0 @@
-%PDF-1.3
-%ª«¬­
-4 0 obj
-<< /Type /Info
-/Producer (FOP 0.20.5) >>
-endobj
-5 0 obj
-<< /Length 357 /Filter [ /ASCII85Decode /FlateDecode ]
- >>
-stream
-Gaua8btc/1%*.g^$6Qk^-;!O#4Yjs/\05kp%])n/8m=*_!93B[Z>%F*JL-*4muO)PH`>]c.L0*!#Mh3(T9aJ!1BDm;!buX($]]L8J_n4Yp=OK<mMBge<AjZZFu%n3%L[LdrA<OKL_K3.6&JM"jcQ7+O0.pXnug41n_`'JR5*Y4#_'l/>^@M!/C]e0dV/G03i&\$pNF"bS28W!,h0bJS[iSe0_\((-;Ro0B_E=0&-aMl6:F_(nSj]qPC];gZ__$#4AU`B,um7QmK=-Q,se(HSQk"O:;_D<ir7d[M5V]_.Or+d;;CQ.2UsIhG<a?:=B>-2ll%h8&X0Th2Mt?.OD&\he$nQq[f-N2/<H.~>
-endstream
-endobj
-6 0 obj
-<< /Type /Page
-/Parent 1 0 R
-/MediaBox [ 0 0 612 792 ]
-/Resources 3 0 R
-/Contents 5 0 R
-/Annots 7 0 R
->>
-endobj
-7 0 obj
-[
-8 0 R
-]
-endobj
-8 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 102.0 559.666 166.676 547.666 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A 9 0 R
-/H /I
->>
-endobj
-10 0 obj
-<< /Length 1145 /Filter [ /ASCII85Decode /FlateDecode ]
- >>
-stream
-Gat=k:NOuI&B=2<'Y;!F!S*@7J&euo(msjqbmXj"_/7]?OP%NlkeO*/(!Dm`8$8?G8NYW(4aj,3=VnF.lq/Ibi#'][U)5I+Jn"L"8B28T$,_E&1g?_n-Np'9=-VJ7a^q_`B2rNcbpiLLB()KRZfi4dgAJEWS]_YIKL!X3it^MU;cFZW(O_;r/=.JGCMob93KABE]VUiS^tBGEr1:jL,p]ck37$dL?h;/C]u\We\c8)*<C0YOY^-lr_UMGdZ_+,T:&9s;B!k7'@re]U(c$;9eiuN/hP-t2E@nGYBT3;m.R_\f<[\12&j;qX``OnI<@J_t:Hd;Ja%lU!@_2o>q^`,AU[V[m\Q;$O/>NOeBP"W/?KgLZWj7"l^t-?<jU0;OA8;qXQT]-M^ld1L=ujPDZ\;OFkH1Z*;0pUD[X\pVd9ushg5aP3cm=/g)(X[u^SFYAJoHtKg"NpW#A=]JY1)0]DGT\Y]IL8"ZfPK(0t4(,FqE8\Ajp*s\[!5kN>BlbU'VgMc%']/cMpZe&+4J!A`7qL4tNB$$Gc?^10hV5F-G5gqVQG,R_$rS5;WrShSP?6qVR4iE2b1'Upl1oBBuX$#HdBe/=VX)LBZ=:!C=!A,$;WbgE_*9?^Fo,-MbgJ$`6*4c/=:,dB&D9'kB80oXrt6O),VDd<l?c9]uBHK[o*0ORGndH!]QC87B9/AVa4'Y^k@VT]e4pD5(*W]e-;H[$Dp4"h,+ADQL`ie>cXn\b/4*B>+,3QKaI'[p>H!X"jH)?1lYM>W,/-h>?Kc3\o1/,YqoikX#W$)Xfj)5o/"<_2#-#*,-mqS#bpNSo$gX5-+,51&.GslkbaA5_B5f9/VQ4kY:TVg/_oJ`S3eel.9^FWOnfk4dTI43,^C[\[=&VA[\Rqa/'(KkS('$2C`-;JF+)uACp"<!j1*5dEA)W<KXca;P\9;mr"j:k(FsNi-e%:SbFb-(WE0J)Nk_K.4@6f0?lV[p(#ZJIUA4>@`dHt.^hJ?O/iNc\8p4+aT9?p6Q09BdLe^?$e+.`Nbf??/]*B;Qs*fZi@lJ6$A/j$6AaHlipfg4/XFqiT%\V;;,7%H`j,0:g!]Z7q."a.?/H>,eIt'=I,q<[k4>t&MB$ig<\(r;YrA5]fpM?>R:G^A57#.K$3~>
-endstream
-endobj
-11 0 obj
-<< /Type /Page
-/Parent 1 0 R
-/MediaBox [ 0 0 612 792 ]
-/Resources 3 0 R
-/Contents 10 0 R
-/Annots 12 0 R
->>
-endobj
-12 0 obj
-[
-13 0 R
-14 0 R
-15 0 R
-16 0 R
-17 0 R
-18 0 R
-19 0 R
-20 0 R
-]
-endobj
-13 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 607.266 173.676 595.266 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://blog.lucene.com/)
-/S /URI >>
-/H /I
->>
-endobj
-14 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 594.066 166.32 582.066 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://people.apache.org/~nigel)
-/S /URI >>
-/H /I
->>
-endobj
-15 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 567.666 184.476 555.666 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://people.apache.org/~omalley)
-/S /URI >>
-/H /I
->>
-endobj
-16 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 554.466 162.996 542.466 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://weblogs.java.net/blog/tomwhite/)
-/S /URI >>
-/H /I
->>
-endobj
-17 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 541.266 182.664 529.266 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://people.apache.org/~acmurthy)
-/S /URI >>
-/H /I
->>
-endobj
-18 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 528.066 167.64 516.066 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://people.apache.org/~ddas)
-/S /URI >>
-/H /I
->>
-endobj
-19 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 514.866 172.332 502.866 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://people.apache.org/~enis)
-/S /URI >>
-/H /I
->>
-endobj
-20 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 501.666 191.664 489.666 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://people.apache.org/~taton)
-/S /URI >>
-/H /I
->>
-endobj
-22 0 obj
-<<
- /Title (\376\377\0\61\0\40\0\103\0\157\0\155\0\155\0\151\0\164\0\164\0\145\0\162\0\163)
- /Parent 21 0 R
- /A 9 0 R
->> endobj
-23 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F3
-/BaseFont /Helvetica-Bold
-/Encoding /WinAnsiEncoding >>
-endobj
-24 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F5
-/BaseFont /Times-Roman
-/Encoding /WinAnsiEncoding >>
-endobj
-25 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F1
-/BaseFont /Helvetica
-/Encoding /WinAnsiEncoding >>
-endobj
-26 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F2
-/BaseFont /Helvetica-Oblique
-/Encoding /WinAnsiEncoding >>
-endobj
-27 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F7
-/BaseFont /Times-Bold
-/Encoding /WinAnsiEncoding >>
-endobj
-1 0 obj
-<< /Type /Pages
-/Count 2
-/Kids [6 0 R 11 0 R ] >>
-endobj
-2 0 obj
-<< /Type /Catalog
-/Pages 1 0 R
- /Outlines 21 0 R
- /PageMode /UseOutlines
- >>
-endobj
-3 0 obj
-<< 
-/Font << /F3 23 0 R /F5 24 0 R /F1 25 0 R /F2 26 0 R /F7 27 0 R >> 
-/ProcSet [ /PDF /ImageC /Text ] >> 
-endobj
-9 0 obj
-<<
-/S /GoTo
-/D [11 0 R /XYZ 85.0 659.0 null]
->>
-endobj
-21 0 obj
-<<
- /First 22 0 R
- /Last 22 0 R
->> endobj
-xref
-0 28
-0000000000 65535 f 
-0000004393 00000 n 
-0000004458 00000 n 
-0000004550 00000 n 
-0000000015 00000 n 
-0000000071 00000 n 
-0000000519 00000 n 
-0000000639 00000 n 
-0000000664 00000 n 
-0000004673 00000 n 
-0000000799 00000 n 
-0000002037 00000 n 
-0000002160 00000 n 
-0000002236 00000 n 
-0000002411 00000 n 
-0000002593 00000 n 
-0000002778 00000 n 
-0000002968 00000 n 
-0000003154 00000 n 
-0000003335 00000 n 
-0000003517 00000 n 
-0000004736 00000 n 
-0000003700 00000 n 
-0000003837 00000 n 
-0000003950 00000 n 
-0000004060 00000 n 
-0000004168 00000 n 
-0000004284 00000 n 
-trailer
-<<
-/Size 28
-/Root 2 0 R
-/Info 4 0 R
->>
-startxref
-4787
-%%EOF

+ 0 - 44
docs/doap.rdf

@@ -1,44 +0,0 @@
-<?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl"?>
-<rdf:RDF xml:lang="en"
-         xmlns="http://usefulinc.com/ns/doap#" 
-         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" 
-         xmlns:asfext="http://projects.apache.org/ns/asfext#"
-         xmlns:foaf="http://xmlns.com/foaf/0.1/">
-<!--
-  =======================================================================
-
-   Copyright (c) 2006 The Apache Software Foundation.  
-   All rights reserved.
-
-  =======================================================================
--->
-  <Project rdf:about="http://lucene.apache.org/hadoop/">
-    <created>2006-02-28</created>
-    <license rdf:resource="http://usefulinc.com/doap/licenses/asl20" />
-    <name>Apache Hadoop</name>
-    <homepage rdf:resource="http://lucene.apache.org/hadoop/" />
-    <asfext:pmc rdf:resource="http://lucene.apache.org" />
-    <shortdesc>A distributed computing platform.</shortdesc>
-    <description>Hadoop is a Lucene sub-project that contains the distributed computing platform that was formerly a part of Nutch. This includes the Hadoop Distributed Filesystem (HDFS) and an implementation of MapReduce.</description>
-    <bug-database rdf:resource="http://nagoya.apache.org/jira/browse/HADOOP" />
-    <mailing-list rdf:resource="http://lucene.apache.org/hadoop/mailing_lists.html" />
-    <download-page rdf:resource="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/" />
-    <programming-language>Java</programming-language>
-    <category rdf:resource="http://projects.apache.org/category/database" />
-    <release>
-      <Version>
-        <branch>branch-0.1</branch>
-        <name>nutch-0.1.1</name>
-        <created>2006-04-07</created>
-        <revision>0.1.1</revision>
-      </Version>
-    </release>
-    <repository>
-      <SVNRepository>
-        <location rdf:resource="http://svn.apache.org/repos/asf/lucene/hadoop/"/>
-        <browse rdf:resource="http://svn.apache.org/viewcvs.cgi/lucene/hadoop/"/>
-      </SVNRepository>
-    </repository>
-  </Project>
-</rdf:RDF>

+ 0 - 248
docs/documentation.html

@@ -1,248 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html>
-<head>
-<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
-<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
-<meta name="Forrest-skin-name" content="pelt">
-<title>Hadoop Documentation</title>
-<link type="text/css" href="skin/basic.css" rel="stylesheet">
-<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
-<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
-<link type="text/css" href="skin/profile.css" rel="stylesheet">
-<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
-<link rel="shortcut icon" href="images/favicon.ico">
-</head>
-<body onload="init()">
-<script type="text/javascript">ndeSetTextSize();</script>
-<div id="top">
-<!--+
-    |breadtrail
-    +-->
-<div class="breadtrail">
-<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/hadoop/">Hadoop</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
-</div>
-<!--+
-    |header
-    +-->
-<div class="header">
-<!--+
-    |start group logo
-    +-->
-<div class="grouplogo">
-<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="images/lucene_green_150.gif" title="Apache Lucene"></a>
-</div>
-<!--+
-    |end group logo
-    +-->
-<!--+
-    |start Project Logo
-    +-->
-<div class="projectlogo">
-<a href="http://lucene.apache.org/hadoop/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Scalable Computing Platform"></a>
-</div>
-<!--+
-    |end Project Logo
-    +-->
-<!--+
-    |start Search
-    +-->
-<div class="searchbox">
-<form action="http://www.google.com/search" method="get" class="roundtopsmall">
-<input value="lucene.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
-                    <input name="Search" value="Search" type="submit">
-</form>
-</div>
-<!--+
-    |end search
-    +-->
-<!--+
-    |start Tabs
-    +-->
-<ul id="tabs">
-<li class="current">
-<a class="selected" href="index.html">Main</a>
-</li>
-<li>
-<a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
-</li>
-</ul>
-<!--+
-    |end Tabs
-    +-->
-</div>
-</div>
-<div id="main">
-<div id="publishedStrip">
-<!--+
-    |start Subtabs
-    +-->
-<div id="level2tabs"></div>
-<!--+
-    |end Endtabs
-    +-->
-<script type="text/javascript"><!--
-document.write("Last Published: " + document.lastModified);
-//  --></script>
-</div>
-<!--+
-    |breadtrail
-    +-->
-<div class="breadtrail">
-
-             &nbsp;
-           </div>
-<!--+
-    |start Menu, mainarea
-    +-->
-<!--+
-    |start Menu
-    +-->
-<div id="menu">
-<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Project</div>
-<div id="menu_1.1" class="menuitemgroup">
-<div class="menuitem">
-<a href="releases.html">Releases</a>
-</div>
-<div class="menuitem">
-<a href="releases.html#News">News</a>
-</div>
-<div class="menuitem">
-<a href="credits.html">Credits</a>
-</div>
-<div class="menuitem">
-<a href="http://www.cafepress.com/hadoop/">Buy Stuff</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_selected_1.2', 'skin/')" id="menu_selected_1.2Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Documentation</div>
-<div id="menu_selected_1.2" class="selectedmenuitemgroup" style="display: block;">
-<div class="menupage">
-<div class="menupagetitle">Overview</div>
-</div>
-<div class="menuitem">
-<a href="quickstart.html">Quickstart</a>
-</div>
-<div class="menuitem">
-<a href="cluster_setup.html">Cluster Setup</a>
-</div>
-<div class="menuitem">
-<a href="hdfs_design.html">HDFS Architecture</a>
-</div>
-<div class="menuitem">
-<a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
-</div>
-<div class="menuitem">
-<a href="api/index.html">API Docs</a>
-</div>
-<div class="menuitem">
-<a href="http://wiki.apache.org/lucene-hadoop/">Wiki</a>
-</div>
-<div class="menuitem">
-<a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
-</div>
-<div class="menuitem">
-<a href="mailing_lists.html#Users">Mailing Lists</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">Developers</div>
-<div id="menu_1.3" class="menuitemgroup">
-<div class="menuitem">
-<a href="mailing_lists.html#Developers">Mailing Lists</a>
-</div>
-<div class="menuitem">
-<a href="issue_tracking.html">Issue Tracking</a>
-</div>
-<div class="menuitem">
-<a href="version_control.html">Version Control</a>
-</div>
-<div class="menuitem">
-<a href="http://lucene.zones.apache.org:8080/hudson/job/Hadoop-Nightly/">Nightly Build</a>
-</div>
-<div class="menuitem">
-<a href="irc.html">IRC Channel</a>
-</div>
-</div>
-<div id="credit"></div>
-<div id="roundbottom">
-<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
-<!--+
-  |alternative credits
-  +-->
-<div id="credit2"></div>
-</div>
-<!--+
-    |end Menu
-    +-->
-<!--+
-    |start content
-    +-->
-<div id="content">
-<div title="Portable Document Format" class="pdflink">
-<a class="dida" href="documentation.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
-        PDF</a>
-</div>
-<h1>Hadoop Documentation</h1>
-    
-<p>
-    The following documents provide concepts and procedures that will help you 
-    get started using Hadoop. If you have more questions, you can ask the 
-    <a href="mailing_lists.html">mailing list</a> or browse the archives.
-    </p>
-    
-<ul>
-      
-<li>
-<a href="quickstart.html">Hadoop Quickstart</a>
-</li>
-      
-<li>
-<a href="cluster_setup.html">Hadoop Cluster Setup</a>
-</li>
-      
-<li>
-<a href="hdfs_design.html">Hadoop Distributed File System</a>
-</li>
-      
-<li>
-<a href="mapred_tutorial.html">Hadoop Map-Reduce Tutorial</a>
-</li>
-      
-<li>
-<a href="api/index.html">API Docs</a>
-</li>
-      
-<li>
-<a href="http://wiki.apache.org/lucene-hadoop/">Wiki</a>
-</li>
-      
-<li>
-<a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
-</li>
-    
-</ul>
-  
-</div>
-<!--+
-    |end content
-    +-->
-<div class="clearboth">&nbsp;</div>
-</div>
-<div id="footer">
-<!--+
-    |start bottomstrip
-    +-->
-<div class="lastmodified">
-<script type="text/javascript"><!--
-document.write("Last Published: " + document.lastModified);
-//  --></script>
-</div>
-<div class="copyright">
-        Copyright &copy;
-         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
-</div>
-<!--+
-    |end bottomstrip
-    +-->
-</div>
-</body>
-</html>

+ 0 - 196
docs/documentation.pdf

@@ -1,196 +0,0 @@
-%PDF-1.3
-%ª«¬­
-4 0 obj
-<< /Type /Info
-/Producer (FOP 0.20.5) >>
-endobj
-5 0 obj
-<< /Length 1161 /Filter [ /ASCII85Decode /FlateDecode ]
- >>
-stream
-Gat=+9lldX&A@ZcFA-:M<CfZ:A(!2N3n'KDp.Puk%g:'6'5SXPilSs=ps]A"&5Ne]0&$'V/ut8I?<U]R)57TfBXtQBnIo"-D(HV<rF[@Yn-OkX-6ud_d,0a'r[Np=Qfp5JV1*T-'*n>.m5$s7j8C;#LR(.^-q?1t5W_NlFg3&aKs]X8!eqL%0%P"V`Q)r40s4N9S/U!&M-n.8#RfX?TFPpt^L)-7#ATMO$7s9,IV,Zk(e)HuWp!il[l?=d7Y.Kpo92kMl?'We$3rXeML!ptW2^jrNk=rigX^iX=mk8P=>.3WMdiTV7Y1_l&*f9@_q\7oq:j>DYq"GjiB+?4/$R'JQHq88nor0[)&?2[Sd5oZ\VlJ]@GP/>b9f:'TXFn+"VbD.LJ_3/PJhm<Jlk\[%j==_K%ZOUs$e.M_'Tj%Nl!WRe\]#X0W,*YCZE?4pF1r[WB+&HDMN2C'NN0M8[&Y2<JoP5@ukFH`\<a%e0Cf;Un0K,j_BZa=UNpBf^ieB)3b2?ee>6o_:(37dG0\2e,2AC6@kY-0SGO9#Z#ehntQbug,9Z/^!FUS\_OJ<MEI8LSEafI^X0Dj:,Gf2g2gl"27rcgTn*8*X^JN_#"%V2.)li'FbXqO7X:H%A5$nX]&43b>XJM="e^0`GhsFJFX.[5:g&77eITU;pAB@^U)XZ9+)9,irBS/T3NjNT!]dAjZl`HW9'A+MTHTub)a-`Un0p:dAY65g?SK2m[NS%'DlPHI69+Fl2PED.D&A6QP'u8?b+5CJ6F_FLI7=sHl@$'W-r@O#dp$]KViMjZPr!Mg*uCtlViMeCH%#%(i6\"Ea&%>W<\W+F#(kP3HgV#)^Q1I&J,+lG&,I=bN&F-9^7J%tKL6$rLYrjS-Jm?j6@n91T#F:b8$cAd'(!n^%DA3001L.2A$kp!fbtLYLTZ*`NKNtV1d*)4oE0nUJ;2uWot&0d>,c?e&?EQodTQ5XmK`/tNY'ICLJ_HO=^<2q:$>CJ*6o1ub;0)-jD`Zil':2\5#\]JDGTNOX8=OB`[V/GH3UrcY:U07.7]4]l\:,YkKPBVUoqU$IQ#g<7<#%]Q'PJ,>N3P)&oP.Gq5Sk=gbooo`&1Toh-([t#=!b4,$nnP567A138^p^Y@A!XWis^j.(N8aX&LCKC5#CIrrIqX;Xs~>
-endstream
-endobj
-6 0 obj
-<< /Type /Page
-/Parent 1 0 R
-/MediaBox [ 0 0 612 792 ]
-/Resources 3 0 R
-/Contents 5 0 R
-/Annots 7 0 R
->>
-endobj
-7 0 obj
-[
-8 0 R
-9 0 R
-10 0 R
-11 0 R
-12 0 R
-13 0 R
-14 0 R
-15 0 R
-]
-endobj
-8 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 376.956 572.6 431.304 560.6 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (mailing_lists.html)
-/S /URI >>
-/H /I
->>
-endobj
-9 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 542.2 198.984 530.2 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (quickstart.html)
-/S /URI >>
-/H /I
->>
-endobj
-10 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 529.0 213.996 517.0 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (cluster_setup.html)
-/S /URI >>
-/H /I
->>
-endobj
-11 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 515.8 263.004 503.8 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (hdfs_design.html)
-/S /URI >>
-/H /I
->>
-endobj
-12 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 502.6 252.636 490.6 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (mapred_tutorial.html)
-/S /URI >>
-/H /I
->>
-endobj
-13 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 489.4 154.992 477.4 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (api/index.html)
-/S /URI >>
-/H /I
->>
-endobj
-14 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 476.2 132.0 464.2 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://wiki.apache.org/lucene-hadoop/)
-/S /URI >>
-/H /I
->>
-endobj
-15 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 463.0 132.0 451.0 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://wiki.apache.org/lucene-hadoop/FAQ)
-/S /URI >>
-/H /I
->>
-endobj
-16 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F3
-/BaseFont /Helvetica-Bold
-/Encoding /WinAnsiEncoding >>
-endobj
-17 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F5
-/BaseFont /Times-Roman
-/Encoding /WinAnsiEncoding >>
-endobj
-18 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F1
-/BaseFont /Helvetica
-/Encoding /WinAnsiEncoding >>
-endobj
-19 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F2
-/BaseFont /Helvetica-Oblique
-/Encoding /WinAnsiEncoding >>
-endobj
-1 0 obj
-<< /Type /Pages
-/Count 1
-/Kids [6 0 R ] >>
-endobj
-2 0 obj
-<< /Type /Catalog
-/Pages 1 0 R
- >>
-endobj
-3 0 obj
-<< 
-/Font << /F3 16 0 R /F5 17 0 R /F1 18 0 R /F2 19 0 R >> 
-/ProcSet [ /PDF /ImageC /Text ] >> 
-endobj
-xref
-0 20
-0000000000 65535 f 
-0000003322 00000 n 
-0000003380 00000 n 
-0000003430 00000 n 
-0000000015 00000 n 
-0000000071 00000 n 
-0000001324 00000 n 
-0000001444 00000 n 
-0000001517 00000 n 
-0000001684 00000 n 
-0000001846 00000 n 
-0000002012 00000 n 
-0000002176 00000 n 
-0000002344 00000 n 
-0000002506 00000 n 
-0000002689 00000 n 
-0000002875 00000 n 
-0000002988 00000 n 
-0000003098 00000 n 
-0000003206 00000 n 
-trailer
-<<
-/Size 20
-/Root 2 0 R
-/Info 4 0 R
->>
-startxref
-3542
-%%EOF

+ 12 - 39
docs/hdfs_design.html

@@ -62,12 +62,15 @@
     |start Tabs
     |start Tabs
     +-->
     +-->
 <ul id="tabs">
 <ul id="tabs">
-<li class="current">
-<a class="selected" href="index.html">Main</a>
+<li>
+<a class="unselected" href="http://lucene.apache.org/hadoop/">Project</a>
 </li>
 </li>
 <li>
 <li>
 <a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
 <a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
 </li>
 </li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.15 Documentation</a>
+</li>
 </ul>
 </ul>
 <!--+
 <!--+
     |end Tabs
     |end Tabs
@@ -101,25 +104,10 @@ document.write("Last Published: " + document.lastModified);
     |start Menu
     |start Menu
     +-->
     +-->
 <div id="menu">
 <div id="menu">
-<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Project</div>
-<div id="menu_1.1" class="menuitemgroup">
-<div class="menuitem">
-<a href="releases.html">Releases</a>
-</div>
-<div class="menuitem">
-<a href="releases.html#News">News</a>
-</div>
-<div class="menuitem">
-<a href="credits.html">Credits</a>
-</div>
-<div class="menuitem">
-<a href="http://www.cafepress.com/hadoop/">Buy Stuff</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_selected_1.2', 'skin/')" id="menu_selected_1.2Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Documentation</div>
-<div id="menu_selected_1.2" class="selectedmenuitemgroup" style="display: block;">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Documentation</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
 <div class="menuitem">
 <div class="menuitem">
-<a href="documentation.html">Overview</a>
+<a href="index.html">Overview</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
 <a href="quickstart.html">Quickstart</a>
 <a href="quickstart.html">Quickstart</a>
@@ -134,6 +122,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
 <a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
 <a href="api/index.html">API Docs</a>
 <a href="api/index.html">API Docs</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
@@ -143,25 +134,7 @@ document.write("Last Published: " + document.lastModified);
 <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
 <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
-<a href="mailing_lists.html#Users">Mailing Lists</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">Developers</div>
-<div id="menu_1.3" class="menuitemgroup">
-<div class="menuitem">
-<a href="mailing_lists.html#Developers">Mailing Lists</a>
-</div>
-<div class="menuitem">
-<a href="issue_tracking.html">Issue Tracking</a>
-</div>
-<div class="menuitem">
-<a href="version_control.html">Version Control</a>
-</div>
-<div class="menuitem">
-<a href="http://lucene.zones.apache.org:8080/hudson/job/Hadoop-Nightly/">Nightly Build</a>
-</div>
-<div class="menuitem">
-<a href="irc.html">IRC Channel</a>
+<a href="http://lucene.apache.org/hadoop/mailing_lists.html">Mailing Lists</a>
 </div>
 </div>
 </div>
 </div>
 <div id="credit"></div>
 <div id="credit"></div>

+ 37 - 141
docs/index.html

@@ -5,7 +5,7 @@
 <meta content="Apache Forrest" name="Generator">
 <meta content="Apache Forrest" name="Generator">
 <meta name="Forrest-version" content="0.8">
 <meta name="Forrest-version" content="0.8">
 <meta name="Forrest-skin-name" content="pelt">
 <meta name="Forrest-skin-name" content="pelt">
-<title>Welcome to Hadoop!</title>
+<title>Hadoop Documentation</title>
 <link type="text/css" href="skin/basic.css" rel="stylesheet">
 <link type="text/css" href="skin/basic.css" rel="stylesheet">
 <link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
 <link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
 <link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
 <link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
@@ -60,12 +60,15 @@
     |start Tabs
     |start Tabs
     +-->
     +-->
 <ul id="tabs">
 <ul id="tabs">
-<li class="current">
-<a class="selected" href="index.html">Main</a>
+<li>
+<a class="unselected" href="http://lucene.apache.org/hadoop/">Project</a>
 </li>
 </li>
 <li>
 <li>
 <a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
 <a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
 </li>
 </li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.15 Documentation</a>
+</li>
 </ul>
 </ul>
 <!--+
 <!--+
     |end Tabs
     |end Tabs
@@ -99,25 +102,10 @@ document.write("Last Published: " + document.lastModified);
     |start Menu
     |start Menu
     +-->
     +-->
 <div id="menu">
 <div id="menu">
-<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Project</div>
-<div id="menu_1.1" class="menuitemgroup">
-<div class="menuitem">
-<a href="releases.html">Releases</a>
-</div>
-<div class="menuitem">
-<a href="releases.html#News">News</a>
-</div>
-<div class="menuitem">
-<a href="credits.html">Credits</a>
-</div>
-<div class="menuitem">
-<a href="http://www.cafepress.com/hadoop/">Buy Stuff</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Documentation</div>
-<div id="menu_1.2" class="menuitemgroup">
-<div class="menuitem">
-<a href="documentation.html">Overview</a>
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Documentation</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menupage">
+<div class="menupagetitle">Overview</div>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
 <a href="quickstart.html">Quickstart</a>
 <a href="quickstart.html">Quickstart</a>
@@ -132,6 +120,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
 <a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
 <a href="api/index.html">API Docs</a>
 <a href="api/index.html">API Docs</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
@@ -141,25 +132,7 @@ document.write("Last Published: " + document.lastModified);
 <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
 <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
-<a href="mailing_lists.html#Users">Mailing Lists</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">Developers</div>
-<div id="menu_1.3" class="menuitemgroup">
-<div class="menuitem">
-<a href="mailing_lists.html#Developers">Mailing Lists</a>
-</div>
-<div class="menuitem">
-<a href="issue_tracking.html">Issue Tracking</a>
-</div>
-<div class="menuitem">
-<a href="version_control.html">Version Control</a>
-</div>
-<div class="menuitem">
-<a href="http://lucene.zones.apache.org:8080/hudson/job/Hadoop-Nightly/">Nightly Build</a>
-</div>
-<div class="menuitem">
-<a href="irc.html">IRC Channel</a>
+<a href="http://lucene.apache.org/hadoop/mailing_lists.html">Mailing Lists</a>
 </div>
 </div>
 </div>
 </div>
 <div id="credit">
 <div id="credit">
@@ -184,122 +157,45 @@ document.write("Last Published: " + document.lastModified);
 <a class="dida" href="index.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
 <a class="dida" href="index.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
         PDF</a>
         PDF</a>
 </div>
 </div>
-<h1>Welcome to Hadoop!</h1>
-<div id="minitoc-area">
-<ul class="minitoc">
-<li>
-<a href="#Getting+Started"> Getting Started </a>
-</li>
-<li>
-<a href="#Getting+Involved"> Getting Involved </a>
-</li>
-</ul>
-</div>
+<h1>Hadoop Documentation</h1>
     
     
 <p>
 <p>
-    Hadoop is a software platform that lets one easily write and run
-    applications that process vast amounts of data.</p>
-
-    
-<p>Here's what makes Hadoop especially useful:</p>
+    The following documents provide concepts and procedures that will help you 
+    get started using Hadoop. If you have more questions, you can ask the 
+    <a href="http://lucene.apache.org/hadoop/mailing_lists.html">mailing list</a> or browse the archives.
+    </p>
     
     
 <ul>
 <ul>
       
       
 <li>
 <li>
-<strong>Scalable:</strong>
-      Hadoop can reliably store and process petabytes.</li>
+<a href="quickstart.html">Hadoop Quickstart</a>
+</li>
       
       
 <li>
 <li>
-<strong>Economical:</strong>
-      It distributes the data and processing across clusters of
-      commonly available computers. These clusters can number into the
-      thousands of nodes.</li>
+<a href="cluster_setup.html">Hadoop Cluster Setup</a>
+</li>
       
       
 <li>
 <li>
-<strong>Efficient:</strong>
-      By distributing the data, Hadoop can process it in parallel on
-      the nodes where the data is located. This makes it extremely
-      rapid.</li>
+<a href="hdfs_design.html">Hadoop Distributed File System</a>
+</li>
       
       
 <li>
 <li>
-<strong>Reliable:</strong>
-      Hadoop automatically maintains multiple copies of data and
-      automatically redeploys computing tasks based on failures.</li>
-    
-</ul>
-    
-<p>
-    Hadoop implements <a href="http://wiki.apache.org/lucene-hadoop/HadoopMapReduce">MapReduce</a>,
-    using the Hadoop Distributed File System (<a href="hdfs_design.html"><acronym title="Hadoop Distributed File System">HDFS</acronym></a>) (see figure below.)  MapReduce divides
-    applications into many small blocks of work.  HDFS creates
-    multiple replicas of data blocks for reliability, placing them on
-    compute nodes around the cluster.  MapReduce can then process the
-    data where it is located.
-    </p>
-
-    
-<p>Hadoop has been demonstrated on clusters with 2000 nodes.
-    The current design target is 10,000 node clusters.</p>
-
-    
-<p>Hadoop is a <a href="http://lucene.apache.org/">Lucene</a> sub-project
-    that contains the distributed computing platform that was
-    formerly a part of <a href="http://lucene.apache.org/nutch/">Nutch</a>. 
-    </p>
-
-    
-<p>For more information about Hadoop, please see the <a href="http://wiki.apache.org/lucene-hadoop/">Hadoop wiki.</a>
-</p>     
-
-
-    
-<div id="" style="text-align: center;">
-<img id="" class="figure" alt="architecture" src="images/architecture.gif"></div>
-
-    
-<a name="N1004E"></a><a name="Getting+Started"></a>
-<h2 class="h3"> Getting Started </h2>
-<div class="section">
-<p>
-      The Hadoop project plans to scale Hadoop up to handling thousands of computers. However, to begin with you can start by installing in on a single machine or a very small cluster.
-      </p>
-<ol>
-        
-<li>
-<a href="documentation.html">Learn about</a> Hadoop by reading the documentation.</li>
-        
-<li>
-<a href="releases.html">Download</a> Hadoop from the release page.</li>
-        
-<li>Hadoop <a href="quickstart.html">Quickstart</a>.</li>
-        
+<a href="mapred_tutorial.html">Hadoop Map-Reduce Tutorial</a>
+</li>
+      
 <li>
 <li>
-<a href="cluster_setup.html">Hadoop Cluster Setup</a>.</li>
-        
+<a href="api/index.html">API Docs</a>
+</li>
+      
 <li>
 <li>
-<a href="mailing_lists.html">Discuss it</a> on the mailing list.</li>
+<a href="http://wiki.apache.org/lucene-hadoop/">Wiki</a>
+</li>
       
       
-</ol>
-</div>
-
+<li>
+<a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
+</li>
     
     
-<a name="N1007A"></a><a name="Getting+Involved"></a>
-<h2 class="h3"> Getting Involved </h2>
-<div class="section">
-<p>
-      Hadoop is an open source volunteer project under the Apache Software Foundation. We encourage you to learn about the project and contribute your expertise. Here are some starter links:
-      </p>
-<ol>
-        
-<li>See our <a href="http://wiki.apache.org/lucene-hadoop/HowToContribute">How to Contribute to Hadoop</a> page.</li>
-        
-<li>Give us <a href="issue_tracking.html">feedback</a>: What can we do better?</li>
-        
-<li>Join the <a href="mailing_lists.html">mailing list</a>: Meet the community.</li>
-      
-</ol>
-</div>
-
+</ul>
   
   
 </div>
 </div>
 <!--+
 <!--+

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 9 - 37
docs/index.pdf


+ 0 - 220
docs/irc.html

@@ -1,220 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html>
-<head>
-<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
-<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
-<meta name="Forrest-skin-name" content="pelt">
-<title>Hadoop IRC Channel</title>
-<link type="text/css" href="skin/basic.css" rel="stylesheet">
-<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
-<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
-<link type="text/css" href="skin/profile.css" rel="stylesheet">
-<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
-<link rel="shortcut icon" href="images/favicon.ico">
-</head>
-<body onload="init()">
-<script type="text/javascript">ndeSetTextSize();</script>
-<div id="top">
-<!--+
-    |breadtrail
-    +-->
-<div class="breadtrail">
-<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/hadoop/">Hadoop</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
-</div>
-<!--+
-    |header
-    +-->
-<div class="header">
-<!--+
-    |start group logo
-    +-->
-<div class="grouplogo">
-<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="images/lucene_green_150.gif" title="Apache Lucene"></a>
-</div>
-<!--+
-    |end group logo
-    +-->
-<!--+
-    |start Project Logo
-    +-->
-<div class="projectlogo">
-<a href="http://lucene.apache.org/hadoop/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Scalable Computing Platform"></a>
-</div>
-<!--+
-    |end Project Logo
-    +-->
-<!--+
-    |start Search
-    +-->
-<div class="searchbox">
-<form action="http://www.google.com/search" method="get" class="roundtopsmall">
-<input value="lucene.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
-                    <input name="Search" value="Search" type="submit">
-</form>
-</div>
-<!--+
-    |end search
-    +-->
-<!--+
-    |start Tabs
-    +-->
-<ul id="tabs">
-<li class="current">
-<a class="selected" href="index.html">Main</a>
-</li>
-<li>
-<a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
-</li>
-</ul>
-<!--+
-    |end Tabs
-    +-->
-</div>
-</div>
-<div id="main">
-<div id="publishedStrip">
-<!--+
-    |start Subtabs
-    +-->
-<div id="level2tabs"></div>
-<!--+
-    |end Endtabs
-    +-->
-<script type="text/javascript"><!--
-document.write("Last Published: " + document.lastModified);
-//  --></script>
-</div>
-<!--+
-    |breadtrail
-    +-->
-<div class="breadtrail">
-
-             &nbsp;
-           </div>
-<!--+
-    |start Menu, mainarea
-    +-->
-<!--+
-    |start Menu
-    +-->
-<div id="menu">
-<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Project</div>
-<div id="menu_1.1" class="menuitemgroup">
-<div class="menuitem">
-<a href="releases.html">Releases</a>
-</div>
-<div class="menuitem">
-<a href="releases.html#News">News</a>
-</div>
-<div class="menuitem">
-<a href="credits.html">Credits</a>
-</div>
-<div class="menuitem">
-<a href="http://www.cafepress.com/hadoop/">Buy Stuff</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Documentation</div>
-<div id="menu_1.2" class="menuitemgroup">
-<div class="menuitem">
-<a href="documentation.html">Overview</a>
-</div>
-<div class="menuitem">
-<a href="quickstart.html">Quickstart</a>
-</div>
-<div class="menuitem">
-<a href="cluster_setup.html">Cluster Setup</a>
-</div>
-<div class="menuitem">
-<a href="hdfs_design.html">HDFS Architecture</a>
-</div>
-<div class="menuitem">
-<a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
-</div>
-<div class="menuitem">
-<a href="api/index.html">API Docs</a>
-</div>
-<div class="menuitem">
-<a href="http://wiki.apache.org/lucene-hadoop/">Wiki</a>
-</div>
-<div class="menuitem">
-<a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
-</div>
-<div class="menuitem">
-<a href="mailing_lists.html#Users">Mailing Lists</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_selected_1.3', 'skin/')" id="menu_selected_1.3Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Developers</div>
-<div id="menu_selected_1.3" class="selectedmenuitemgroup" style="display: block;">
-<div class="menuitem">
-<a href="mailing_lists.html#Developers">Mailing Lists</a>
-</div>
-<div class="menuitem">
-<a href="issue_tracking.html">Issue Tracking</a>
-</div>
-<div class="menuitem">
-<a href="version_control.html">Version Control</a>
-</div>
-<div class="menuitem">
-<a href="http://lucene.zones.apache.org:8080/hudson/job/Hadoop-Nightly/">Nightly Build</a>
-</div>
-<div class="menupage">
-<div class="menupagetitle">IRC Channel</div>
-</div>
-</div>
-<div id="credit"></div>
-<div id="roundbottom">
-<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
-<!--+
-  |alternative credits
-  +-->
-<div id="credit2"></div>
-</div>
-<!--+
-    |end Menu
-    +-->
-<!--+
-    |start content
-    +-->
-<div id="content">
-<div title="Portable Document Format" class="pdflink">
-<a class="dida" href="irc.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
-        PDF</a>
-</div>
-<h1>Hadoop IRC Channel</h1>
-
-    
-<p>There is an IRC channel dedicated to hadoop at <strong>irc.freenode.org</strong>. 
-    The name of the channel is <strong>#hadoop</strong>.</p> 
-    
-    
-<p>
-      The IRC channel can be used for online discussion about hadoop related stuff, but developers should be careful to transfer all the official decisions or useful discussions to the issue tracking system.
-    </p>  
-
-  
-</div>
-<!--+
-    |end content
-    +-->
-<div class="clearboth">&nbsp;</div>
-</div>
-<div id="footer">
-<!--+
-    |start bottomstrip
-    +-->
-<div class="lastmodified">
-<script type="text/javascript"><!--
-document.write("Last Published: " + document.lastModified);
-//  --></script>
-</div>
-<div class="copyright">
-        Copyright &copy;
-         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
-</div>
-<!--+
-    |end bottomstrip
-    +-->
-</div>
-</body>
-</html>

+ 0 - 94
docs/irc.pdf

@@ -1,94 +0,0 @@
-%PDF-1.3
-%ª«¬­
-4 0 obj
-<< /Type /Info
-/Producer (FOP 0.20.5) >>
-endobj
-5 0 obj
-<< /Length 620 /Filter [ /ASCII85Decode /FlateDecode ]
- >>
-stream
-Gasam9lJf0&A@sB+SE%&>(SK]lXR58eFqJE\$QBl@?'Q+/_I&nmu,f`0NN@tb*(!Ok2X,mc[D#54qrf.9MhK?14<\&cp#VbM,0/sBs2%M(ZcDi#P>P%,[0Ak2s:@G[de2*q:dNQ>pl^Rr=/U[RP3&I7ld,gY8m,5_BiWs_kp@=9eQ3_ZnW`+Kh^lSS/KBocEbitejqNo2JimUXh32\c]3e4'WX/A(6maGdrgtr9<>g)f!PiH^]+O-7OYDg>MJ0P4;(E)M??+=KDa]92hWQHXtlA5/fA7opihVt=0p21fEHQ22T'0[[:<6F$EEj823P26:V^cTY\r0?+GfEE"]nnAj;<Jg^m0.bj)D]_?M3X(#fEV6H]STADUl)Yh,3ueOa7Vs$h_lWZ#;9oP/8_4p6WC2nA5C3!):l8<snE'2U[C]X%P5T!<*Y1=;5fk3%5WGACI[[^+cRFbjuHc(Zpj1r+549B_i>GF]dAZA!?jTNKKE=:+BLg`(i5tVAsl8FS7T(T>hD$kKEYRdY`#uI<*VQ?#im\Qre@mSNFQ1ZgWW*FFI&X-U\9?&j!Mb%.9-B6N#[(iK*J9kJ<.d'giZC"1,Nt1H<>`!3;djZYBO6^Toje-N~>
-endstream
-endobj
-6 0 obj
-<< /Type /Page
-/Parent 1 0 R
-/MediaBox [ 0 0 612 792 ]
-/Resources 3 0 R
-/Contents 5 0 R
->>
-endobj
-7 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F3
-/BaseFont /Helvetica-Bold
-/Encoding /WinAnsiEncoding >>
-endobj
-8 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F5
-/BaseFont /Times-Roman
-/Encoding /WinAnsiEncoding >>
-endobj
-9 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F1
-/BaseFont /Helvetica
-/Encoding /WinAnsiEncoding >>
-endobj
-10 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F2
-/BaseFont /Helvetica-Oblique
-/Encoding /WinAnsiEncoding >>
-endobj
-11 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F7
-/BaseFont /Times-Bold
-/Encoding /WinAnsiEncoding >>
-endobj
-1 0 obj
-<< /Type /Pages
-/Count 1
-/Kids [6 0 R ] >>
-endobj
-2 0 obj
-<< /Type /Catalog
-/Pages 1 0 R
- >>
-endobj
-3 0 obj
-<< 
-/Font << /F3 7 0 R /F5 8 0 R /F1 9 0 R /F2 10 0 R /F7 11 0 R >> 
-/ProcSet [ /PDF /ImageC /Text ] >> 
-endobj
-xref
-0 12
-0000000000 65535 f 
-0000001441 00000 n 
-0000001499 00000 n 
-0000001549 00000 n 
-0000000015 00000 n 
-0000000071 00000 n 
-0000000782 00000 n 
-0000000888 00000 n 
-0000001000 00000 n 
-0000001109 00000 n 
-0000001216 00000 n 
-0000001332 00000 n 
-trailer
-<<
-/Size 12
-/Root 2 0 R
-/Info 4 0 R
->>
-startxref
-1669
-%%EOF

+ 0 - 223
docs/issue_tracking.html

@@ -1,223 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html>
-<head>
-<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
-<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
-<meta name="Forrest-skin-name" content="pelt">
-<title>Hadoop Issue Tracking</title>
-<link type="text/css" href="skin/basic.css" rel="stylesheet">
-<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
-<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
-<link type="text/css" href="skin/profile.css" rel="stylesheet">
-<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
-<link rel="shortcut icon" href="images/favicon.ico">
-</head>
-<body onload="init()">
-<script type="text/javascript">ndeSetTextSize();</script>
-<div id="top">
-<!--+
-    |breadtrail
-    +-->
-<div class="breadtrail">
-<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/hadoop/">Hadoop</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
-</div>
-<!--+
-    |header
-    +-->
-<div class="header">
-<!--+
-    |start group logo
-    +-->
-<div class="grouplogo">
-<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="images/lucene_green_150.gif" title="Apache Lucene"></a>
-</div>
-<!--+
-    |end group logo
-    +-->
-<!--+
-    |start Project Logo
-    +-->
-<div class="projectlogo">
-<a href="http://lucene.apache.org/hadoop/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Scalable Computing Platform"></a>
-</div>
-<!--+
-    |end Project Logo
-    +-->
-<!--+
-    |start Search
-    +-->
-<div class="searchbox">
-<form action="http://www.google.com/search" method="get" class="roundtopsmall">
-<input value="lucene.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
-                    <input name="Search" value="Search" type="submit">
-</form>
-</div>
-<!--+
-    |end search
-    +-->
-<!--+
-    |start Tabs
-    +-->
-<ul id="tabs">
-<li class="current">
-<a class="selected" href="index.html">Main</a>
-</li>
-<li>
-<a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
-</li>
-</ul>
-<!--+
-    |end Tabs
-    +-->
-</div>
-</div>
-<div id="main">
-<div id="publishedStrip">
-<!--+
-    |start Subtabs
-    +-->
-<div id="level2tabs"></div>
-<!--+
-    |end Endtabs
-    +-->
-<script type="text/javascript"><!--
-document.write("Last Published: " + document.lastModified);
-//  --></script>
-</div>
-<!--+
-    |breadtrail
-    +-->
-<div class="breadtrail">
-
-             &nbsp;
-           </div>
-<!--+
-    |start Menu, mainarea
-    +-->
-<!--+
-    |start Menu
-    +-->
-<div id="menu">
-<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Project</div>
-<div id="menu_1.1" class="menuitemgroup">
-<div class="menuitem">
-<a href="releases.html">Releases</a>
-</div>
-<div class="menuitem">
-<a href="releases.html#News">News</a>
-</div>
-<div class="menuitem">
-<a href="credits.html">Credits</a>
-</div>
-<div class="menuitem">
-<a href="http://www.cafepress.com/hadoop/">Buy Stuff</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Documentation</div>
-<div id="menu_1.2" class="menuitemgroup">
-<div class="menuitem">
-<a href="documentation.html">Overview</a>
-</div>
-<div class="menuitem">
-<a href="quickstart.html">Quickstart</a>
-</div>
-<div class="menuitem">
-<a href="cluster_setup.html">Cluster Setup</a>
-</div>
-<div class="menuitem">
-<a href="hdfs_design.html">HDFS Architecture</a>
-</div>
-<div class="menuitem">
-<a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
-</div>
-<div class="menuitem">
-<a href="api/index.html">API Docs</a>
-</div>
-<div class="menuitem">
-<a href="http://wiki.apache.org/lucene-hadoop/">Wiki</a>
-</div>
-<div class="menuitem">
-<a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
-</div>
-<div class="menuitem">
-<a href="mailing_lists.html#Users">Mailing Lists</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_selected_1.3', 'skin/')" id="menu_selected_1.3Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Developers</div>
-<div id="menu_selected_1.3" class="selectedmenuitemgroup" style="display: block;">
-<div class="menuitem">
-<a href="mailing_lists.html#Developers">Mailing Lists</a>
-</div>
-<div class="menupage">
-<div class="menupagetitle">Issue Tracking</div>
-</div>
-<div class="menuitem">
-<a href="version_control.html">Version Control</a>
-</div>
-<div class="menuitem">
-<a href="http://lucene.zones.apache.org:8080/hudson/job/Hadoop-Nightly/">Nightly Build</a>
-</div>
-<div class="menuitem">
-<a href="irc.html">IRC Channel</a>
-</div>
-</div>
-<div id="credit"></div>
-<div id="roundbottom">
-<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
-<!--+
-  |alternative credits
-  +-->
-<div id="credit2"></div>
-</div>
-<!--+
-    |end Menu
-    +-->
-<!--+
-    |start content
-    +-->
-<div id="content">
-<div title="Portable Document Format" class="pdflink">
-<a class="dida" href="issue_tracking.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
-        PDF</a>
-</div>
-<h1>Hadoop Issue Tracking</h1>
-    
-<p>
-    Hadoop tracks both bugs and enhancement requests <a href="http://issues.apache.org/jira/browse/HADOOP">here</a> using Apache JIRA.
-    We welcome input, however, <strong>before filing a request,</strong> please make sure you do the following:
-    </p>
-    
-<ul>
-      
-<li>Search the JIRA database.</li>
-      
-<li>Check the user <a href="mailing_lists.html#Users">mailing list</a>, both by searching the archives and by asking questions.</li>
-    
-</ul>
-  
-</div>
-<!--+
-    |end content
-    +-->
-<div class="clearboth">&nbsp;</div>
-</div>
-<div id="footer">
-<!--+
-    |start bottomstrip
-    +-->
-<div class="lastmodified">
-<script type="text/javascript"><!--
-document.write("Last Published: " + document.lastModified);
-//  --></script>
-</div>
-<div class="copyright">
-        Copyright &copy;
-         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
-</div>
-<!--+
-    |end bottomstrip
-    +-->
-</div>
-</body>
-</html>

+ 0 - 126
docs/issue_tracking.pdf

@@ -1,126 +0,0 @@
-%PDF-1.3
-%ª«¬­
-4 0 obj
-<< /Type /Info
-/Producer (FOP 0.20.5) >>
-endobj
-5 0 obj
-<< /Length 760 /Filter [ /ASCII85Decode /FlateDecode ]
- >>
-stream
-Gat%!_/e6`&A@rkF<iYAO^q*?]d95t-FY=nl(R?-(kEQ`74]C4^TT=L>!l!<iP;PGHLL7dXn7t`pX9\Y#SWFR!cn33"+lg>7E#-QRN0pI6uXP`P\e^Md<@H,Ap]:s0c@UYkfMO^q_0I!\m?,p47jI<cOQNkb"hFh9J.V,,eCqTa5!USJlj,@9U11-?@n[sk!%-\VM\>kFA_kQ;SUi;<jn:c2$r,lP7mNY%0^0.Z(Sr_&iaoh2CK7J1KSM(Os>Qg0$t`R#![^[Ut]0-dCCZd!p[pMWEWEUe55WM++.;IVJDadLTO0)mGmB-QNulpT-1PnH"o5X;8:9uC+<m6<lMMM`dV9,p3NX[nbK-EePT0enKa.l3&A6%-/`.3nt'rGeb\eF2m?EBOBbUuF0.!dfVgZ`C^oV>VY7j]!#NKN/PMb*^!lfQXG%-D1D3g0XcPK]p^QbX`6AdK=XX/JX"qp(<#e[GLfK_/\N97f-&!N)s!K7;1YkY!P3I'OTW#<1*#K)&1E7Gf*ASTnK>D5KZ9u7p08&4\J>/$<4nBie-&j-]oJZ4S`5Moc&IL'W1^n@^jEW6eS?Hc%_'FA(nEkJii]7&M2`BpF:%)>UlqS'sF:_'tFUtAf\WnTpme:oC1hNIM@`_KsHqZ6SW75L#lG<F`\U=,i`(WW?<(f6l'>`)7\P]fd1>>1,>%X%L!T"Zk5%919n2b+<WUO!E.=O.4C9L761,X45L;_Vo_6uK#6,RP*XIYu-H4ERI"2<9H-i~>
-endstream
-endobj
-6 0 obj
-<< /Type /Page
-/Parent 1 0 R
-/MediaBox [ 0 0 612 792 ]
-/Resources 3 0 R
-/Contents 5 0 R
-/Annots 7 0 R
->>
-endobj
-7 0 obj
-[
-8 0 R
-9 0 R
-]
-endobj
-8 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 341.616 585.8 362.268 573.8 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://issues.apache.org/jira/browse/HADOOP)
-/S /URI >>
-/H /I
->>
-endobj
-9 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 182.316 542.2 236.664 530.2 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (mailing_lists.html#Users)
-/S /URI >>
-/H /I
->>
-endobj
-10 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F3
-/BaseFont /Helvetica-Bold
-/Encoding /WinAnsiEncoding >>
-endobj
-11 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F5
-/BaseFont /Times-Roman
-/Encoding /WinAnsiEncoding >>
-endobj
-12 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F1
-/BaseFont /Helvetica
-/Encoding /WinAnsiEncoding >>
-endobj
-13 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F2
-/BaseFont /Helvetica-Oblique
-/Encoding /WinAnsiEncoding >>
-endobj
-14 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F7
-/BaseFont /Times-Bold
-/Encoding /WinAnsiEncoding >>
-endobj
-1 0 obj
-<< /Type /Pages
-/Count 1
-/Kids [6 0 R ] >>
-endobj
-2 0 obj
-<< /Type /Catalog
-/Pages 1 0 R
- >>
-endobj
-3 0 obj
-<< 
-/Font << /F3 10 0 R /F5 11 0 R /F1 12 0 R /F2 13 0 R /F7 14 0 R >> 
-/ProcSet [ /PDF /ImageC /Text ] >> 
-endobj
-xref
-0 15
-0000000000 65535 f 
-0000001994 00000 n 
-0000002052 00000 n 
-0000002102 00000 n 
-0000000015 00000 n 
-0000000071 00000 n 
-0000000922 00000 n 
-0000001042 00000 n 
-0000001073 00000 n 
-0000001265 00000 n 
-0000001438 00000 n 
-0000001551 00000 n 
-0000001661 00000 n 
-0000001769 00000 n 
-0000001885 00000 n 
-trailer
-<<
-/Size 15
-/Root 2 0 R
-/Info 4 0 R
->>
-startxref
-2225
-%%EOF

+ 17 - 112
docs/linkmap.html

@@ -60,12 +60,15 @@
     |start Tabs
     |start Tabs
     +-->
     +-->
 <ul id="tabs">
 <ul id="tabs">
-<li class="current">
-<a class="selected" href="index.html">Main</a>
+<li>
+<a class="unselected" href="http://lucene.apache.org/hadoop/">Project</a>
 </li>
 </li>
 <li>
 <li>
 <a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
 <a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
 </li>
 </li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.15 Documentation</a>
+</li>
 </ul>
 </ul>
 <!--+
 <!--+
     |end Tabs
     |end Tabs
@@ -99,25 +102,10 @@ document.write("Last Published: " + document.lastModified);
     |start Menu
     |start Menu
     +-->
     +-->
 <div id="menu">
 <div id="menu">
-<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Project</div>
+<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Documentation</div>
 <div id="menu_1.1" class="menuitemgroup">
 <div id="menu_1.1" class="menuitemgroup">
 <div class="menuitem">
 <div class="menuitem">
-<a href="releases.html">Releases</a>
-</div>
-<div class="menuitem">
-<a href="releases.html#News">News</a>
-</div>
-<div class="menuitem">
-<a href="credits.html">Credits</a>
-</div>
-<div class="menuitem">
-<a href="http://www.cafepress.com/hadoop/">Buy Stuff</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Documentation</div>
-<div id="menu_1.2" class="menuitemgroup">
-<div class="menuitem">
-<a href="documentation.html">Overview</a>
+<a href="index.html">Overview</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
 <a href="quickstart.html">Quickstart</a>
 <a href="quickstart.html">Quickstart</a>
@@ -132,6 +120,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
 <a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
 <a href="api/index.html">API Docs</a>
 <a href="api/index.html">API Docs</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
@@ -141,25 +132,7 @@ document.write("Last Published: " + document.lastModified);
 <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
 <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
-<a href="mailing_lists.html#Users">Mailing Lists</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">Developers</div>
-<div id="menu_1.3" class="menuitemgroup">
-<div class="menuitem">
-<a href="mailing_lists.html#Developers">Mailing Lists</a>
-</div>
-<div class="menuitem">
-<a href="issue_tracking.html">Issue Tracking</a>
-</div>
-<div class="menuitem">
-<a href="version_control.html">Version Control</a>
-</div>
-<div class="menuitem">
-<a href="http://lucene.zones.apache.org:8080/hudson/job/Hadoop-Nightly/">Nightly Build</a>
-</div>
-<div class="menuitem">
-<a href="irc.html">IRC Channel</a>
+<a href="http://lucene.apache.org/hadoop/mailing_lists.html">Mailing Lists</a>
 </div>
 </div>
 </div>
 </div>
 <div id="credit"></div>
 <div id="credit"></div>
@@ -192,40 +165,6 @@ document.write("Last Published: " + document.lastModified);
 <ul>
 <ul>
 
 
   
   
-<ul>
-<li>
-<a>Project</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>project</em>
-</li>
-<ul>
-    
-<ul>
-<li>
-<a href="releases.html">Releases</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>releases</em>
-</li>
-</ul>
-    
-<ul>
-<li>
-<a href="releases.html#News">News</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>news</em>
-</li>
-</ul>
-    
-<ul>
-<li>
-<a href="credits.html">Credits</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>credits</em>
-</li>
-</ul> 
-    
-<ul>
-<li>
-<a href="http://www.cafepress.com/hadoop/">Buy Stuff</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>store</em>
-</li>
-</ul>    
-  
-</ul>
-</ul>
-
-  
 <ul>
 <ul>
 <li>
 <li>
 <a>Documentation</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>docs</em>
 <a>Documentation</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>docs</em>
@@ -234,7 +173,7 @@ document.write("Last Published: " + document.lastModified);
     
     
 <ul>
 <ul>
 <li>
 <li>
-<a href="documentation.html">Overview</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>overview</em>
+<a href="index.html">Overview</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>overview</em>
 </li>
 </li>
 </ul>
 </ul>
     
     
@@ -264,65 +203,31 @@ document.write("Last Published: " + document.lastModified);
     
     
 <ul>
 <ul>
 <li>
 <li>
-<a href="api/index.html">API Docs</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>api</em>
-</li>
-</ul>
-    
-<ul>
-<li>
-<a href="http://wiki.apache.org/lucene-hadoop/">Wiki</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>wiki</em>
-</li>
-</ul>
-    
-<ul>
-<li>
-<a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>faq</em>
-</li>
-</ul>
-    
-<ul>
-<li>
-<a href="mailing_lists.html#Users">Mailing Lists</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>usermail</em>
-</li>
-</ul>
-  
-</ul>
-</ul>
-
-  
-<ul>
-<li>
-<a>Developers</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>resources</em>
-</li>
-<ul>
-    
-<ul>
-<li>
-<a href="mailing_lists.html#Developers">Mailing Lists</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>devmail</em>
+<a href="streaming.html">Streaming</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>streaming</em>
 </li>
 </li>
 </ul>
 </ul>
     
     
 <ul>
 <ul>
 <li>
 <li>
-<a href="issue_tracking.html">Issue Tracking</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>issues</em>
+<a href="api/index.html">API Docs</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>api</em>
 </li>
 </li>
 </ul>
 </ul>
     
     
 <ul>
 <ul>
 <li>
 <li>
-<a href="version_control.html">Version Control</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>vcs</em>
+<a href="http://wiki.apache.org/lucene-hadoop/">Wiki</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>wiki</em>
 </li>
 </li>
 </ul>
 </ul>
     
     
 <ul>
 <ul>
 <li>
 <li>
-<a href="http://lucene.zones.apache.org:8080/hudson/job/Hadoop-Nightly/">Nightly Build</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>nightly</em>
+<a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>faq</em>
 </li>
 </li>
 </ul>
 </ul>
     
     
 <ul>
 <ul>
 <li>
 <li>
-<a href="irc.html">IRC Channel</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>irc</em>
+<a href="http://lucene.apache.org/hadoop/mailing_lists.html">Mailing Lists</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>lists</em>
 </li>
 </li>
 </ul>
 </ul>
   
   

+ 12 - 12
docs/linkmap.pdf

@@ -5,10 +5,10 @@
 /Producer (FOP 0.20.5) >>
 /Producer (FOP 0.20.5) >>
 endobj
 endobj
 5 0 obj
 5 0 obj
-<< /Length 1121 /Filter [ /ASCII85Decode /FlateDecode ]
+<< /Length 807 /Filter [ /ASCII85Decode /FlateDecode ]
  >>
  >>
 stream
 stream
-Gau1.9lK&M&;KZQ'foq;@JMM>R#_H@@X#FUN:tUM$<Em22R;OCP(W`-,pao_ZX=M%Vuo"'"jd'b3&;PGY?*qoOUP.gZh7o6h7m\.ZhkK[T`%5\l/jsZJFFaen$5/QmSki(A>_C?m$j27WdWNVrB.>#IlH52]pYKGFPG!Imlk93B'Wg*1<U6#PQNq3kC>o\afW%PF!d9$F5fEBq=.!0%493]\_kPT;!HfMOhE^&.=WVsg/I9"WNJ0IeIP-$(NpL$B-esa,PsEYfmbOg60B$'+e[/^r?uE_h-(X8L&_#>o4!g]?kDK,PU-f6:2>eU@.#-c!Z2-oV[-"Ts(_0jZKbA^#UC1CLD'JsRrL(K-5a2K&[#?k8C>O3)3!^$X,9`k]GOouK,>:7Z7kbtSVdtD0FdEYUVR*X>*(a]?4gH[=\fX#kUHkJ/HC>iok\]+Mpj[)?ZDVIhrcQ&l\E_3)3L&$(B_(Jf4I&<.:pob^XUf<ZUIOZ9"4'Q,@b$a6FEuQi&_/\62U<tbsBILpNVW>*bsmTUJ#-Z8)sV]:t7qb$^4mR(n3YMX#t]?([+DPa)dpl\m?:.mUSO-f3Dd#Y4GtmfM*-J+F+PnN/s<,G;J3GWOB6oO%3#%BNYC3MbnP#DjH$;cn_%3T>gg%<n9Y"SYPMeVO\>o.Og(3P_QdCLq!%hR?bR$_OQ%fSfoc-oLk0=4@Ws+-MD2B%`)i,r2q)?qOmps>CRR9/b%[27FU>_m,i^UJan_E>c,V_1`nmheYr1jhS2jdHcLDtgP49K=J.Wqdd4'Zid$)liJa?EnfSmbQR89JG2k2N!u3%B4dbLDI0h5WRK3AO'7Ii'*gV15@3QW7K^5c[r,Q77oe9f%EcTptKo4#E/3=N1/=IZqffbpQJg72*hADegon_q93"7_UDs.dp?!f."^8UpGhfq<8q4O0;h36LZGS)F6cOrm</<=Dfp$IIra/s#ZRI3RO^/e103r\(#p$-P0hi=$\"ET[2N/kI\@$V[uD,rTrF*B:Wp,tClM[Q%Rk$<n-lVBL`8ci*YM-T9CGN4'*?[0=oL?9)&0_b"W>ORl*[MP2n^=/SN8JlC.8e5c%2PIq`=jtTI;7lG>dakaWXMMRAIf]@X^b,~>
+GatUr9i'Ou&;KZL'gBJ8f:Vju0$k?8W*7hE4d;se'/+-`NL.Qap"q]g!BZqc.I9[=*a>',)/$s\dldK6VLU@V5e`5>r@9&pDBfQds2?+eF"p%D`[T"'i7H@G)0lWc8<WZs8!g6igP^]VaMm$d=P$bA51?p%[:t]<DifJ9PrZXu.?EP7P/%nh-^X:$CL(nkEYOOf+$L8r_b%GEfh$)rH@Mlh$"d?U>,!VO,nihWVWeH:2%mX-isBn#_(:"D^EJ9F'.cUr>Bl)LH]IedPZlgD3#[JM]`#K&N8HT?$&2SP`\(PUdh!e5&.CpX7bB(*]AV"XdI$i*-B-CG;DcOp6OST??+cL4349KjoY2&Qh]HD#T]G-B.S_/+nQ8/n31V9]"JuiC=.Li/=sFrlS;k-$$WZTs[ki+WV7qgnd@F9CCLlH=Hm;@p3:1COG'<ULMQQJ0#8o=iWNREtcIX"2\8$n,T?0G8/dBQU(pe=c\gt8]8]Va@XH.;ZE,mPRXpC&N]JI,rU0:13O<nIJh>WXSA3%tN.scei@i?fiWkSF&<`2Q0,-CT#a+iTqP$.bYi]$@S3#\)<P+cnc@cF%ml"HMZQHW-IV\TA3o\03&i,4Q&:+*)P]t4%_:?['UmcS:dp.c'qb%9,Pqn#h0=oJJd^>J.\J#$1,O2F>WoB:X:;hd@MUBDn`YX'-m_EfE_2S+c@D0nYHp#l#q,miTtGD#9#)fq*<a:fqBV@W\FFh9HDDn.#2H]'#2"RXJeBQdD];2"X4Dp>%jJsj%7l>jC*@IRB!H"53="'`54FX0WD6iR[Sm7:F~>
 endstream
 endstream
 endobj
 endobj
 6 0 obj
 6 0 obj
@@ -72,17 +72,17 @@ endobj
 xref
 xref
 0 12
 0 12
 0000000000 65535 f 
 0000000000 65535 f 
-0000001945 00000 n 
-0000002003 00000 n 
-0000002053 00000 n 
+0000001630 00000 n 
+0000001688 00000 n 
+0000001738 00000 n 
 0000000015 00000 n 
 0000000015 00000 n 
 0000000071 00000 n 
 0000000071 00000 n 
-0000001284 00000 n 
-0000001390 00000 n 
-0000001502 00000 n 
-0000001611 00000 n 
-0000001721 00000 n 
-0000001829 00000 n 
+0000000969 00000 n 
+0000001075 00000 n 
+0000001187 00000 n 
+0000001296 00000 n 
+0000001406 00000 n 
+0000001514 00000 n 
 trailer
 trailer
 <<
 <<
 /Size 12
 /Size 12
@@ -90,5 +90,5 @@ trailer
 /Info 4 0 R
 /Info 4 0 R
 >>
 >>
 startxref
 startxref
-2173
+1858
 %%EOF
 %%EOF

BIN
docs/logos/elephant_rgb.jpg


BIN
docs/logos/elephant_rgb.pdf


BIN
docs/logos/elephant_rgb.png


BIN
docs/logos/hadoop+elephant_rgb.jpg


BIN
docs/logos/hadoop+elephant_rgb.pdf


BIN
docs/logos/hadoop+elephant_rgb.png


BIN
docs/logos/hadoop_rgb.jpg


BIN
docs/logos/hadoop_rgb.pdf


BIN
docs/logos/hadoop_rgb.png


+ 0 - 318
docs/mailing_lists.html

@@ -1,318 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html>
-<head>
-<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
-<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
-<meta name="Forrest-skin-name" content="pelt">
-<title>Hadoop Mailing Lists</title>
-<link type="text/css" href="skin/basic.css" rel="stylesheet">
-<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
-<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
-<link type="text/css" href="skin/profile.css" rel="stylesheet">
-<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
-<link rel="shortcut icon" href="images/favicon.ico">
-</head>
-<body onload="init()">
-<script type="text/javascript">ndeSetTextSize();</script>
-<div id="top">
-<!--+
-    |breadtrail
-    +-->
-<div class="breadtrail">
-<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/hadoop/">Hadoop</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
-</div>
-<!--+
-    |header
-    +-->
-<div class="header">
-<!--+
-    |start group logo
-    +-->
-<div class="grouplogo">
-<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="images/lucene_green_150.gif" title="Apache Lucene"></a>
-</div>
-<!--+
-    |end group logo
-    +-->
-<!--+
-    |start Project Logo
-    +-->
-<div class="projectlogo">
-<a href="http://lucene.apache.org/hadoop/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Scalable Computing Platform"></a>
-</div>
-<!--+
-    |end Project Logo
-    +-->
-<!--+
-    |start Search
-    +-->
-<div class="searchbox">
-<form action="http://www.google.com/search" method="get" class="roundtopsmall">
-<input value="lucene.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
-                    <input name="Search" value="Search" type="submit">
-</form>
-</div>
-<!--+
-    |end search
-    +-->
-<!--+
-    |start Tabs
-    +-->
-<ul id="tabs">
-<li class="current">
-<a class="selected" href="index.html">Main</a>
-</li>
-<li>
-<a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
-</li>
-</ul>
-<!--+
-    |end Tabs
-    +-->
-</div>
-</div>
-<div id="main">
-<div id="publishedStrip">
-<!--+
-    |start Subtabs
-    +-->
-<div id="level2tabs"></div>
-<!--+
-    |end Endtabs
-    +-->
-<script type="text/javascript"><!--
-document.write("Last Published: " + document.lastModified);
-//  --></script>
-</div>
-<!--+
-    |breadtrail
-    +-->
-<div class="breadtrail">
-
-             &nbsp;
-           </div>
-<!--+
-    |start Menu, mainarea
-    +-->
-<!--+
-    |start Menu
-    +-->
-<div id="menu">
-<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Project</div>
-<div id="menu_1.1" class="menuitemgroup">
-<div class="menuitem">
-<a href="releases.html">Releases</a>
-</div>
-<div class="menuitem">
-<a href="releases.html#News">News</a>
-</div>
-<div class="menuitem">
-<a href="credits.html">Credits</a>
-</div>
-<div class="menuitem">
-<a href="http://www.cafepress.com/hadoop/">Buy Stuff</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Documentation</div>
-<div id="menu_1.2" class="menuitemgroup">
-<div class="menuitem">
-<a href="documentation.html">Overview</a>
-</div>
-<div class="menuitem">
-<a href="quickstart.html">Quickstart</a>
-</div>
-<div class="menuitem">
-<a href="cluster_setup.html">Cluster Setup</a>
-</div>
-<div class="menuitem">
-<a href="hdfs_design.html">HDFS Architecture</a>
-</div>
-<div class="menuitem">
-<a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
-</div>
-<div class="menuitem">
-<a href="api/index.html">API Docs</a>
-</div>
-<div class="menuitem">
-<a href="http://wiki.apache.org/lucene-hadoop/">Wiki</a>
-</div>
-<div class="menuitem">
-<a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
-</div>
-<div class="menuitem">
-<a href="mailing_lists.html#Users">Mailing Lists</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">Developers</div>
-<div id="menu_1.3" class="menuitemgroup">
-<div class="menuitem">
-<a href="mailing_lists.html#Developers">Mailing Lists</a>
-</div>
-<div class="menuitem">
-<a href="issue_tracking.html">Issue Tracking</a>
-</div>
-<div class="menuitem">
-<a href="version_control.html">Version Control</a>
-</div>
-<div class="menuitem">
-<a href="http://lucene.zones.apache.org:8080/hudson/job/Hadoop-Nightly/">Nightly Build</a>
-</div>
-<div class="menuitem">
-<a href="irc.html">IRC Channel</a>
-</div>
-</div>
-<div id="credit"></div>
-<div id="roundbottom">
-<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
-<!--+
-  |alternative credits
-  +-->
-<div id="credit2"></div>
-</div>
-<!--+
-    |end Menu
-    +-->
-<!--+
-    |start content
-    +-->
-<div id="content">
-<div title="Portable Document Format" class="pdflink">
-<a class="dida" href="mailing_lists.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
-        PDF</a>
-</div>
-<h1>Hadoop Mailing Lists</h1>
-<div id="minitoc-area">
-<ul class="minitoc">
-<li>
-<a href="#Users">Users</a>
-</li>
-<li>
-<a href="#Developers">Developers</a>
-</li>
-<li>
-<a href="#Commits">Commits</a>
-</li>
-</ul>
-</div>
-  
-    
-<a name="N1000C"></a><a name="Users"></a>
-<h2 class="h3">Users</h2>
-<div class="section">
-<p>If you use Hadoop, please subscribe to the Hadoop user mailing list.</p>
-<p>
-        The Hadoop user mailing list is :
-        <a href="mailto:hadoop-user@lucene.apache.org">hadoop-user@lucene.apache.org</a>.
-      </p>
-<ul>
-        
-<li>
-<a href="mailto:hadoop-user-subscribe@lucene.apache.org">Subscribe to List</a>
-</li>
-        
-<li>
-<a href="mailto:hadoop-user-unsubscribe@lucene.apache.org">Unsubscribe from List</a>
-</li>
-        
-<li>Search List Archive on <a href="http://www.mail-archive.com/hadoop-user%40lucene.apache.org/">The Mail Archive</a>,
-        <a href="http://www.nabble.com/Hadoop-Users-f17067.html">Nabble</a>,
-        or <a href="http://dir.gmane.org/gmane.comp.jakarta.lucene.hadoop.user">Gmane</a>
-</li>
-        
-<li>
-<a href="http://mail-archives.apache.org/mod_mbox/lucene-hadoop-user/">View List Archive</a> (<a href="http://lucene.apache.org/mail/hadoop-user/">Raw files</a>)</li>
-      
-</ul>
-<div class="note">
-<div class="label">Note</div>
-<div class="content">In order to post to the list, it is necessary to first subscribe to it.</div>
-</div>
-</div>
-  
-    
-<a name="N10045"></a><a name="Developers"></a>
-<h2 class="h3">Developers</h2>
-<div class="section">
-<p>If you'd like to contribute to Hadoop, please subscribe to the
-      Hadoop developer mailing list.</p>
-<p>
-        The Hadoop developer mailing list is :
-        <a href="mailto:hadoop-dev@lucene.apache.org">hadoop-dev@lucene.apache.org</a>.
-      </p>
-<ul>
-        
-<li>
-<a href="mailto:hadoop-dev-subscribe@lucene.apache.org">Subscribe to List</a>
-</li>
-        
-<li>
-<a href="mailto:hadoop-dev-unsubscribe@lucene.apache.org">Unsubscribe from List</a>
-</li>
-        
-<li>Search List Archive on <a href="http://www.mail-archive.com/hadoop-dev%40lucene.apache.org/">The Mail Archive</a>,
-        <a href="http://www.nabble.com/Hadoop-Dev-f17068.html">Nabble</a>,
-        or <a href="http://dir.gmane.org/gmane.comp.jakarta.lucene.hadoop.devel">Gmane</a>
-</li>
-        
-<li>
-<a href="http://mail-archives.apache.org/mod_mbox/lucene-hadoop-dev/">View List Archive</a> (<a href="http://lucene.apache.org/mail/hadoop-dev/">Raw files</a>)</li>
-      
-</ul>
-<div class="note">
-<div class="label">Note</div>
-<div class="content">In order to post to the list, it is necessary to first subscribe to it.</div>
-</div>
-</div>
-  
-    
-<a name="N1007E"></a><a name="Commits"></a>
-<h2 class="h3">Commits</h2>
-<div class="section">
-<p>If you'd like to see changes made in Hadoop's <a href="version_control.html">version control system</a>
-      then subscribe to the Hadoop commit mailing list.</p>
-<ul>
-        
-<li>
-<a href="mailto:hadoop-commits-subscribe@lucene.apache.org">Subscribe to List</a>
-</li>
-        
-<li>
-<a href="mailto:hadoop-commits-unsubscribe@lucene.apache.org">Unsubscribe from List</a>
-</li>
-        
-<li>Search List Archive on <a href="http://www.mail-archive.com/hadoop-commits%40lucene.apache.org/">The Mail Archive</a>
-</li>        
-        
-<li>
-<a href="http://mail-archives.apache.org/mod_mbox/lucene-hadoop-commits/">View List Archive</a> (<a href="http://lucene.apache.org/mail/hadoop-commits/">Raw files</a>)</li>
-      
-</ul>
-</div>
-  
-  
-</div>
-<!--+
-    |end content
-    +-->
-<div class="clearboth">&nbsp;</div>
-</div>
-<div id="footer">
-<!--+
-    |start bottomstrip
-    +-->
-<div class="lastmodified">
-<script type="text/javascript"><!--
-document.write("Last Published: " + document.lastModified);
-//  --></script>
-</div>
-<div class="copyright">
-        Copyright &copy;
-         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
-</div>
-<!--+
-    |end bottomstrip
-    +-->
-</div>
-</body>
-</html>

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 0 - 63
docs/mailing_lists.pdf


+ 12 - 39
docs/mapred_tutorial.html

@@ -60,12 +60,15 @@
     |start Tabs
     |start Tabs
     +-->
     +-->
 <ul id="tabs">
 <ul id="tabs">
-<li class="current">
-<a class="selected" href="index.html">Main</a>
+<li>
+<a class="unselected" href="http://lucene.apache.org/hadoop/">Project</a>
 </li>
 </li>
 <li>
 <li>
 <a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
 <a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
 </li>
 </li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.15 Documentation</a>
+</li>
 </ul>
 </ul>
 <!--+
 <!--+
     |end Tabs
     |end Tabs
@@ -99,25 +102,10 @@ document.write("Last Published: " + document.lastModified);
     |start Menu
     |start Menu
     +-->
     +-->
 <div id="menu">
 <div id="menu">
-<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Project</div>
-<div id="menu_1.1" class="menuitemgroup">
-<div class="menuitem">
-<a href="releases.html">Releases</a>
-</div>
-<div class="menuitem">
-<a href="releases.html#News">News</a>
-</div>
-<div class="menuitem">
-<a href="credits.html">Credits</a>
-</div>
-<div class="menuitem">
-<a href="http://www.cafepress.com/hadoop/">Buy Stuff</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_selected_1.2', 'skin/')" id="menu_selected_1.2Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Documentation</div>
-<div id="menu_selected_1.2" class="selectedmenuitemgroup" style="display: block;">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Documentation</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
 <div class="menuitem">
 <div class="menuitem">
-<a href="documentation.html">Overview</a>
+<a href="index.html">Overview</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
 <a href="quickstart.html">Quickstart</a>
 <a href="quickstart.html">Quickstart</a>
@@ -132,6 +120,9 @@ document.write("Last Published: " + document.lastModified);
 <div class="menupagetitle">Map-Reduce Tutorial</div>
 <div class="menupagetitle">Map-Reduce Tutorial</div>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
 <a href="api/index.html">API Docs</a>
 <a href="api/index.html">API Docs</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
@@ -141,25 +132,7 @@ document.write("Last Published: " + document.lastModified);
 <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
 <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
-<a href="mailing_lists.html#Users">Mailing Lists</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">Developers</div>
-<div id="menu_1.3" class="menuitemgroup">
-<div class="menuitem">
-<a href="mailing_lists.html#Developers">Mailing Lists</a>
-</div>
-<div class="menuitem">
-<a href="issue_tracking.html">Issue Tracking</a>
-</div>
-<div class="menuitem">
-<a href="version_control.html">Version Control</a>
-</div>
-<div class="menuitem">
-<a href="http://lucene.zones.apache.org:8080/hudson/job/Hadoop-Nightly/">Nightly Build</a>
-</div>
-<div class="menuitem">
-<a href="irc.html">IRC Channel</a>
+<a href="http://lucene.apache.org/hadoop/mailing_lists.html">Mailing Lists</a>
 </div>
 </div>
 </div>
 </div>
 <div id="credit"></div>
 <div id="credit"></div>

+ 13 - 40
docs/quickstart.html

@@ -60,12 +60,15 @@
     |start Tabs
     |start Tabs
     +-->
     +-->
 <ul id="tabs">
 <ul id="tabs">
-<li class="current">
-<a class="selected" href="index.html">Main</a>
+<li>
+<a class="unselected" href="http://lucene.apache.org/hadoop/">Project</a>
 </li>
 </li>
 <li>
 <li>
 <a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
 <a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
 </li>
 </li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.15 Documentation</a>
+</li>
 </ul>
 </ul>
 <!--+
 <!--+
     |end Tabs
     |end Tabs
@@ -99,25 +102,10 @@ document.write("Last Published: " + document.lastModified);
     |start Menu
     |start Menu
     +-->
     +-->
 <div id="menu">
 <div id="menu">
-<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Project</div>
-<div id="menu_1.1" class="menuitemgroup">
-<div class="menuitem">
-<a href="releases.html">Releases</a>
-</div>
-<div class="menuitem">
-<a href="releases.html#News">News</a>
-</div>
-<div class="menuitem">
-<a href="credits.html">Credits</a>
-</div>
-<div class="menuitem">
-<a href="http://www.cafepress.com/hadoop/">Buy Stuff</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_selected_1.2', 'skin/')" id="menu_selected_1.2Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Documentation</div>
-<div id="menu_selected_1.2" class="selectedmenuitemgroup" style="display: block;">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Documentation</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
 <div class="menuitem">
 <div class="menuitem">
-<a href="documentation.html">Overview</a>
+<a href="index.html">Overview</a>
 </div>
 </div>
 <div class="menupage">
 <div class="menupage">
 <div class="menupagetitle">Quickstart</div>
 <div class="menupagetitle">Quickstart</div>
@@ -132,6 +120,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
 <a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
 <a href="api/index.html">API Docs</a>
 <a href="api/index.html">API Docs</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
@@ -141,25 +132,7 @@ document.write("Last Published: " + document.lastModified);
 <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
 <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
 </div>
 </div>
 <div class="menuitem">
 <div class="menuitem">
-<a href="mailing_lists.html#Users">Mailing Lists</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">Developers</div>
-<div id="menu_1.3" class="menuitemgroup">
-<div class="menuitem">
-<a href="mailing_lists.html#Developers">Mailing Lists</a>
-</div>
-<div class="menuitem">
-<a href="issue_tracking.html">Issue Tracking</a>
-</div>
-<div class="menuitem">
-<a href="version_control.html">Version Control</a>
-</div>
-<div class="menuitem">
-<a href="http://lucene.zones.apache.org:8080/hudson/job/Hadoop-Nightly/">Nightly Build</a>
-</div>
-<div class="menuitem">
-<a href="irc.html">IRC Channel</a>
+<a href="http://lucene.apache.org/hadoop/mailing_lists.html">Mailing Lists</a>
 </div>
 </div>
 </div>
 </div>
 <div id="credit"></div>
 <div id="credit"></div>
@@ -319,7 +292,7 @@ document.write("Last Published: " + document.lastModified);
 <div class="section">
 <div class="section">
 <p>
 <p>
         First, you need to get a Hadoop distribution: download a recent 
         First, you need to get a Hadoop distribution: download a recent 
-        <a href="releases.html">stable release</a> and unpack it.
+        <a href="http://lucene.apache.org/hadoop/releases.html">stable release</a> and unpack it.
       </p>
       </p>
 <p>
 <p>
         Once done, in the distribution edit the file 
         Once done, in the distribution edit the file 

+ 50 - 50
docs/quickstart.pdf

@@ -219,7 +219,7 @@ endobj
 /Rect [ 396.636 145.467 460.944 133.467 ]
 /Rect [ 396.636 145.467 460.944 133.467 ]
 /C [ 0 0 0 ]
 /C [ 0 0 0 ]
 /Border [ 0 0 0 ]
 /Border [ 0 0 0 ]
-/A << /URI (releases.html)
+/A << /URI (http://lucene.apache.org/hadoop/releases.html)
 /S /URI >>
 /S /URI >>
 /H /I
 /H /I
 >>
 >>
@@ -572,37 +572,37 @@ endobj
 xref
 xref
 0 72
 0 72
 0000000000 65535 f 
 0000000000 65535 f 
-0000015453 00000 n 
-0000015539 00000 n 
-0000015631 00000 n 
+0000015485 00000 n 
+0000015571 00000 n 
+0000015663 00000 n 
 0000000015 00000 n 
 0000000015 00000 n 
 0000000071 00000 n 
 0000000071 00000 n 
 0000000950 00000 n 
 0000000950 00000 n 
 0000001070 00000 n 
 0000001070 00000 n 
 0000001172 00000 n 
 0000001172 00000 n 
-0000015776 00000 n 
+0000015808 00000 n 
 0000001307 00000 n 
 0000001307 00000 n 
-0000015839 00000 n 
+0000015871 00000 n 
 0000001444 00000 n 
 0000001444 00000 n 
-0000015905 00000 n 
+0000015937 00000 n 
 0000001579 00000 n 
 0000001579 00000 n 
-0000015971 00000 n 
+0000016003 00000 n 
 0000001716 00000 n 
 0000001716 00000 n 
-0000016037 00000 n 
+0000016069 00000 n 
 0000001853 00000 n 
 0000001853 00000 n 
-0000016103 00000 n 
+0000016135 00000 n 
 0000001990 00000 n 
 0000001990 00000 n 
-0000016169 00000 n 
+0000016201 00000 n 
 0000002127 00000 n 
 0000002127 00000 n 
-0000016233 00000 n 
+0000016265 00000 n 
 0000002264 00000 n 
 0000002264 00000 n 
-0000016299 00000 n 
+0000016331 00000 n 
 0000002401 00000 n 
 0000002401 00000 n 
-0000016365 00000 n 
+0000016397 00000 n 
 0000002538 00000 n 
 0000002538 00000 n 
-0000016429 00000 n 
+0000016461 00000 n 
 0000002674 00000 n 
 0000002674 00000 n 
-0000016495 00000 n 
+0000016527 00000 n 
 0000002811 00000 n 
 0000002811 00000 n 
 0000005170 00000 n 
 0000005170 00000 n 
 0000005293 00000 n 
 0000005293 00000 n 
@@ -610,39 +610,39 @@ xref
 0000005511 00000 n 
 0000005511 00000 n 
 0000005678 00000 n 
 0000005678 00000 n 
 0000005852 00000 n 
 0000005852 00000 n 
-0000006019 00000 n 
-0000007930 00000 n 
-0000008038 00000 n 
-0000009934 00000 n 
-0000010057 00000 n 
-0000010091 00000 n 
-0000010267 00000 n 
-0000010443 00000 n 
-0000011467 00000 n 
-0000011590 00000 n 
-0000011617 00000 n 
-0000016559 00000 n 
-0000011789 00000 n 
-0000011922 00000 n 
-0000012151 00000 n 
-0000012366 00000 n 
-0000016610 00000 n 
-0000012623 00000 n 
-0000012928 00000 n 
-0000013143 00000 n 
-0000013297 00000 n 
-0000013522 00000 n 
-0000013834 00000 n 
-0000014014 00000 n 
-0000014272 00000 n 
-0000014428 00000 n 
-0000014680 00000 n 
-0000014793 00000 n 
-0000014903 00000 n 
-0000015014 00000 n 
-0000015122 00000 n 
-0000015228 00000 n 
-0000015344 00000 n 
+0000006051 00000 n 
+0000007962 00000 n 
+0000008070 00000 n 
+0000009966 00000 n 
+0000010089 00000 n 
+0000010123 00000 n 
+0000010299 00000 n 
+0000010475 00000 n 
+0000011499 00000 n 
+0000011622 00000 n 
+0000011649 00000 n 
+0000016591 00000 n 
+0000011821 00000 n 
+0000011954 00000 n 
+0000012183 00000 n 
+0000012398 00000 n 
+0000016642 00000 n 
+0000012655 00000 n 
+0000012960 00000 n 
+0000013175 00000 n 
+0000013329 00000 n 
+0000013554 00000 n 
+0000013866 00000 n 
+0000014046 00000 n 
+0000014304 00000 n 
+0000014460 00000 n 
+0000014712 00000 n 
+0000014825 00000 n 
+0000014935 00000 n 
+0000015046 00000 n 
+0000015154 00000 n 
+0000015260 00000 n 
+0000015376 00000 n 
 trailer
 trailer
 <<
 <<
 /Size 72
 /Size 72
@@ -650,5 +650,5 @@ trailer
 /Info 4 0 R
 /Info 4 0 R
 >>
 >>
 startxref
 startxref
-16676
+16708
 %%EOF
 %%EOF

+ 0 - 322
docs/releases.html

@@ -1,322 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html>
-<head>
-<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
-<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
-<meta name="Forrest-skin-name" content="pelt">
-<title>Releases</title>
-<link type="text/css" href="skin/basic.css" rel="stylesheet">
-<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
-<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
-<link type="text/css" href="skin/profile.css" rel="stylesheet">
-<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
-<link rel="shortcut icon" href="images/favicon.ico">
-</head>
-<body onload="init()">
-<script type="text/javascript">ndeSetTextSize();</script>
-<div id="top">
-<!--+
-    |breadtrail
-    +-->
-<div class="breadtrail">
-<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/hadoop/">Hadoop</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
-</div>
-<!--+
-    |header
-    +-->
-<div class="header">
-<!--+
-    |start group logo
-    +-->
-<div class="grouplogo">
-<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="images/lucene_green_150.gif" title="Apache Lucene"></a>
-</div>
-<!--+
-    |end group logo
-    +-->
-<!--+
-    |start Project Logo
-    +-->
-<div class="projectlogo">
-<a href="http://lucene.apache.org/hadoop/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Scalable Computing Platform"></a>
-</div>
-<!--+
-    |end Project Logo
-    +-->
-<!--+
-    |start Search
-    +-->
-<div class="searchbox">
-<form action="http://www.google.com/search" method="get" class="roundtopsmall">
-<input value="lucene.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
-                    <input name="Search" value="Search" type="submit">
-</form>
-</div>
-<!--+
-    |end search
-    +-->
-<!--+
-    |start Tabs
-    +-->
-<ul id="tabs">
-<li class="current">
-<a class="selected" href="index.html">Main</a>
-</li>
-<li>
-<a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
-</li>
-</ul>
-<!--+
-    |end Tabs
-    +-->
-</div>
-</div>
-<div id="main">
-<div id="publishedStrip">
-<!--+
-    |start Subtabs
-    +-->
-<div id="level2tabs"></div>
-<!--+
-    |end Endtabs
-    +-->
-<script type="text/javascript"><!--
-document.write("Last Published: " + document.lastModified);
-//  --></script>
-</div>
-<!--+
-    |breadtrail
-    +-->
-<div class="breadtrail">
-
-             &nbsp;
-           </div>
-<!--+
-    |start Menu, mainarea
-    +-->
-<!--+
-    |start Menu
-    +-->
-<div id="menu">
-<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Project</div>
-<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
-<div class="menupage">
-<div class="menupagetitle">Releases</div>
-</div>
-<div class="menuitem">
-<a href="releases.html#News">News</a>
-</div>
-<div class="menuitem">
-<a href="credits.html">Credits</a>
-</div>
-<div class="menuitem">
-<a href="http://www.cafepress.com/hadoop/">Buy Stuff</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Documentation</div>
-<div id="menu_1.2" class="menuitemgroup">
-<div class="menuitem">
-<a href="documentation.html">Overview</a>
-</div>
-<div class="menuitem">
-<a href="quickstart.html">Quickstart</a>
-</div>
-<div class="menuitem">
-<a href="cluster_setup.html">Cluster Setup</a>
-</div>
-<div class="menuitem">
-<a href="hdfs_design.html">HDFS Architecture</a>
-</div>
-<div class="menuitem">
-<a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
-</div>
-<div class="menuitem">
-<a href="api/index.html">API Docs</a>
-</div>
-<div class="menuitem">
-<a href="http://wiki.apache.org/lucene-hadoop/">Wiki</a>
-</div>
-<div class="menuitem">
-<a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
-</div>
-<div class="menuitem">
-<a href="mailing_lists.html#Users">Mailing Lists</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">Developers</div>
-<div id="menu_1.3" class="menuitemgroup">
-<div class="menuitem">
-<a href="mailing_lists.html#Developers">Mailing Lists</a>
-</div>
-<div class="menuitem">
-<a href="issue_tracking.html">Issue Tracking</a>
-</div>
-<div class="menuitem">
-<a href="version_control.html">Version Control</a>
-</div>
-<div class="menuitem">
-<a href="http://lucene.zones.apache.org:8080/hudson/job/Hadoop-Nightly/">Nightly Build</a>
-</div>
-<div class="menuitem">
-<a href="irc.html">IRC Channel</a>
-</div>
-</div>
-<div id="credit"></div>
-<div id="roundbottom">
-<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
-<!--+
-  |alternative credits
-  +-->
-<div id="credit2"></div>
-</div>
-<!--+
-    |end Menu
-    +-->
-<!--+
-    |start content
-    +-->
-<div id="content">
-<div title="Portable Document Format" class="pdflink">
-<a class="dida" href="releases.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
-        PDF</a>
-</div>
-<h1>Releases</h1>
-<div id="minitoc-area">
-<ul class="minitoc">
-<li>
-<a href="#Download">Download</a>
-</li>
-<li>
-<a href="#Release+Notes">Release Notes</a>
-</li>
-<li>
-<a href="#News">News</a>
-<ul class="minitoc">
-<li>
-<a href="#27+November%2C+2007%3A+release+0.15.1+available">27 November, 2007: release 0.15.1 available </a>
-</li>
-<li>
-<a href="#26+November%2C+2007%3A+release+0.14.4+available">26 November, 2007: release 0.14.4 available </a>
-</li>
-<li>
-<a href="#29+October+2007%3A+release+0.15.0+available">29 October 2007: release 0.15.0 available </a>
-</li>
-<li>
-<a href="#19+October%2C+2007%3A+release+0.14.3+available">19 October, 2007: release 0.14.3 available </a>
-</li>
-<li>
-<a href="#4+September%2C+2007%3A+release+0.14.1+available"> 4 September, 2007: release 0.14.1 available </a>
-</li>
-</ul>
-</li>
-</ul>
-</div> 
-
-    
-<a name="N1000C"></a><a name="Download"></a>
-<h2 class="h3">Download</h2>
-<div class="section">
-<p>Releases may be downloaded from Apache mirrors.</p>
-<p>
-      
-<a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/">
-      <strong><strong>Download a release now!</strong></strong></a>
-      
-</p>
-<p>On the mirror, all recent releases are available, but are not
-      guaranteed to be stable. For stable releases, look in the stable
-      directory.
-      </p>
-</div>
-
-    
-<a name="N10023"></a><a name="Release+Notes"></a>
-<h2 class="h3">Release Notes</h2>
-<div class="section">
-<p>Release notes for Hadoop releases are available in Jira.</p>
-<p>
-      
-<a href="http://issues.apache.org/jira/browse/HADOOP?report=com.atlassian.jira.plugin.system.project:changelog-panel">
-      <strong><strong>Browse release notes now!</strong></strong></a>
-      
-</p>
-</div>
-
-    
-<a name="N10037"></a><a name="News"></a>
-<h2 class="h3">News</h2>
-<div class="section">
-<a name="N1003D"></a><a name="27+November%2C+2007%3A+release+0.15.1+available"></a>
-<h3 class="h4">27 November, 2007: release 0.15.1 available </h3>
-<p>This release fixes critical bugs in release 0.15.0.</p>
-<a name="N10047"></a><a name="26+November%2C+2007%3A+release+0.14.4+available"></a>
-<h3 class="h4">26 November, 2007: release 0.14.4 available </h3>
-<p>This release fixes critical bugs in release 0.14.3.</p>
-<a name="N10051"></a><a name="29+October+2007%3A+release+0.15.0+available"></a>
-<h3 class="h4">29 October 2007: release 0.15.0 available </h3>
-<p>This release contains my improvements, new features, bug
-      fixes and optimizations.  See the release notes (above) for
-      details.</p>
-<a name="N1005B"></a><a name="19+October%2C+2007%3A+release+0.14.3+available"></a>
-<h3 class="h4">19 October, 2007: release 0.14.3 available </h3>
-<p>This release fixes critical bugs in release 0.14.2.</p>
-<a name="N10065"></a><a name="4+September%2C+2007%3A+release+0.14.1+available"></a>
-<h3 class="h4"> 4 September, 2007: release 0.14.1 available </h3>
-<p>New features in release 0.14 include:</p>
-<ul>
-	  
-<li>Better checksums in HDFS.  Checksums are no longer
-	  stored in parallel HDFS files, but are stored directly by
-	  datanodes alongside blocks.  This is more efficient for the
-	  namenode and also improves data integrity.</li>
-
-	  
-<li>Pipes: A C++ API for MapReduce</li>
-
-	  
-<li>Eclipse Plugin, including HDFS browsing, job
-	  monitoring, etc.</li>
-
-	  
-<li>File modification times in HDFS.</li>
-	
-</ul>
-<p>There are many other improvements, bug fixes, optimizations
-	and new features.  Performance and reliability are better than
-	ever.</p>
-<div class="note">
-<div class="label">Note</div>
-<div class="content">When upgrading an existing HDFS filesystem to a 0.14.x
-	release from a 0.13.x or earlier release, you should first
-	start HDFS with 'bin/start-dfs.sh -upgrade'.  See the <a href="http://wiki.apache.org/lucene-hadoop/Hadoop_0.14_Upgrade">Hadoop
-	0.14 Upgrade</a> page for details.</div>
-</div>
-</div>
-
-  
-</div>
-<!--+
-    |end content
-    +-->
-<div class="clearboth">&nbsp;</div>
-</div>
-<div id="footer">
-<!--+
-    |start bottomstrip
-    +-->
-<div class="lastmodified">
-<script type="text/javascript"><!--
-document.write("Last Published: " + document.lastModified);
-//  --></script>
-</div>
-<div class="copyright">
-        Copyright &copy;
-         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
-</div>
-<!--+
-    |end bottomstrip
-    +-->
-</div>
-</body>
-</html>

+ 0 - 416
docs/releases.pdf

@@ -1,416 +0,0 @@
-%PDF-1.3
-%ª«¬­
-4 0 obj
-<< /Type /Info
-/Producer (FOP 0.20.5) >>
-endobj
-5 0 obj
-<< /Length 627 /Filter [ /ASCII85Decode /FlateDecode ]
- >>
-stream
-Gb!$E9lnc;&;KZQ'fmZtYXkfJRi;NWi(sXC>N+Ke8t-^M$B@]4Iue-PQ<\%]VO^CW%`p3h8q%kaA6n9<]1Ymq!%%FjU&Jhi:i_mV6J[EZbkEPPJZG$r&*.?rh3=ehRdHXC,l&7CY6jOd7nHTO?sIhGH#[&W3I-P2s#%-`n9\TE``,YL:D-.;j"ZF?5fl:TDEJ]8nVOhOO4Blh^++!)S<J+c3+$a8).UI1;e1X3VE"nJD;_ec8DUIJ!.ADt^M;q!i#X0ZSbHc"B+elrL/2$7e5p73<hn^;Ct$Zek@2SYl8h>1+FQ'V#>6'taH7Nd]\kmCE,4_tTaN="mmC2AXUtmM,7ALr/(Wg,n%P@tdiApG((?ac)IOQ?!Zd4jO6X,,"Gm'bL8)SZ`o5jQk[6f)bedk>>r:-;33k1#.8da:LVVl]j*mu)GXQlO099UYK:a('d6.>rI,H0GGsqM-d'(]kH-p4al,hmui8`!:d.j+Y^Y122Hg>'=K6G8g)(D4([3;U3>B8]+3]M)ROJ3LG]'1mt+]FP7%SVrEUYYjIf6?tKT36NVZQCSuLcgn5jnC))+#_(^Nj%iMfb\e\F(p,*bG&.o$DOo5$pcuL2N#c1e2t!i@/VaK<rnE~>
-endstream
-endobj
-6 0 obj
-<< /Type /Page
-/Parent 1 0 R
-/MediaBox [ 0 0 612 792 ]
-/Resources 3 0 R
-/Contents 5 0 R
-/Annots 7 0 R
->>
-endobj
-7 0 obj
-[
-8 0 R
-10 0 R
-12 0 R
-14 0 R
-16 0 R
-18 0 R
-20 0 R
-22 0 R
-]
-endobj
-8 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 102.0 559.666 159.992 547.666 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A 9 0 R
-/H /I
->>
-endobj
-10 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 102.0 541.466 178.316 529.466 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A 11 0 R
-/H /I
->>
-endobj
-12 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 102.0 523.266 137.324 511.266 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A 13 0 R
-/H /I
->>
-endobj
-14 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 505.066 338.12 493.066 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A 15 0 R
-/H /I
->>
-endobj
-16 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 486.866 338.12 474.866 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A 17 0 R
-/H /I
->>
-endobj
-18 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 468.666 323.12 456.666 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A 19 0 R
-/H /I
->>
-endobj
-20 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 450.466 326.12 438.466 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A 21 0 R
-/H /I
->>
-endobj
-22 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 108.0 432.266 332.792 420.266 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A 23 0 R
-/H /I
->>
-endobj
-24 0 obj
-<< /Length 1682 /Filter [ /ASCII85Decode /FlateDecode ]
- >>
-stream
-Gau`T968iW%)2%/YW$pWCq23^Rp2E48Ma;c[<8Ns=G\Pr'8iiRPZ@J0(OUk2W(20TGfIX,EIeX&hugcaq<XGPhk>Y3c?qKjSb7q&0)SR:\Zk6':+5k,q8W7%MNVc</X+0QeOR5I@]RC^k:^KIAbPG;p,AKm:4J^4-b1@Nk;f8as/kaP^e'NlR/JbO.?mG[Z:\h<+eqI#Q+dJ:)kECue%3r1h-sg(kHK\N_JX:Wg.f&&^X^M?FCGo[WD,1#%B+5*&"fB7#,r;VI:iZ]!N^1cCrD?,!pMdnkHQb^Dp>88UhaUNRHkJd>B\)78GPMVTjJkk58G@H![KoYTL%9O`*i6Pb.c(dM7<t*s*YFi.)MB3NNta(;5R?t9O[<]#$$O$<AbE9It-K4l4Zs>Vm4eX1"O;2J#\(Z6l9L-oLmm!RBK0u]D4FJJ*o4:*-[=j/5;;foe<:cMM141*Q<=:)^K@,b9u.A/G`qke#?)_K2XJ5LXS-($QY$uDJ_H4[<>C(+eBrFs2$RF1q**^[u.%Ff#VdO,6BO[2LQn=V0%sSf?YaO/unt+K`@a&^1QENk(T!HaAF^R.*``OKQJ$nOJAVAaG,[E.[*J997PTjT64i7p@9msm*.1\WfIdj:p_bmOCucb9!NR&];SXB3Rl]0PM5tWSe'<O!:Cm50G6*"!$XOdE1i[l&T+\[7hOSk`&*Q]BN)sZNCBe>#UP>uO4`4hDN?!WaY7g`1t\."kR*@9)N+k*+rCuD6C\(c4kbOHaRi"R,<SR\0;oA#Pe2-O5Fnc?El(i'A1D5N0MhD*'m0!KaXT7rS2TYOn&0Ut'/*t(0UTVClh^'>WS24!KM"QA-L6m.%#bb'0`kN0Fq3=6<tQ`XA]"@*b)V!U7K6aR=f,%;/N#"GMqpEUbU/)SFF.PGaMBuhs3bgJN9Q^Ts4>/?LeBkSI;kigQ[5%BJ9]66&KZ[k47-GlO8op"p+T5[VRkro:cuS1(IkWUOae6F!`s4WMXq0%GC:Y<5Kgf7S)cV&n?db$NG,XmMa2l+$?mJHfqTKA3o?dINWec\ncj5g3[MD_UJ:Bac6JfhaNonO5qKX8EEC)C+KVhgACmJ+Ge2<20]_JEU/M=,GTk@-`Wl#WJn3&C!b%tdfXNd6pa*bm,h12V]/9=#ZsDsPYa[&_l@aYF?.G5kM>SlCG\m!?@^h9\#BQTsE8Jn27>X1F_%c1FcYJ-H4-V01\.$-`N-Ct@f6WNRW!AiWf'8kEP6XR[[^0q*qLiYbhj?OHUpbY/bAZ'O1BI@Da4j++7j#`h%n7p/[NoY;Rt#73V^SWY$U[r7S%,X=7qt./TR9!B[AhL^n]''"ZD$u,3f<H_(\A^4>UMGrnQK?s(55<A,.?D=jIUh7iTH3>@*E@#/Yh/*gLI;@>aK,sB,_=@Z8Pef&AsLDR!NnFU!'Bk53n\o0-`DkaF?#t:;n;S.=#(o_$SY"]T9KJ6[ASN"kT5&B`J(@$0V5h[gsh@/PW:53<9ghUfY.%S61/H)p6\"],R=BeeJut.&h[E9"K\@^$r%1!]j3ZJpG-P-rGP&bJ2Zpo%&2UeU3DY@j-BPXZQ^YT)$a>G$R4q0RH(R*C*f7]'t*OgXe8c5F:p!#A&HrP'3mX%(k'&$C>MTI1`R3O*6fWlVsg+DXB%_bE>QGZ0LpQ[4$01BPfMg^6fI+a8Z1$s2sm~>
-endstream
-endobj
-25 0 obj
-<< /Type /Page
-/Parent 1 0 R
-/MediaBox [ 0 0 612 792 ]
-/Resources 3 0 R
-/Contents 24 0 R
-/Annots 26 0 R
->>
-endobj
-26 0 obj
-[
-27 0 R
-28 0 R
-]
-endobj
-27 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 90.0 608.466 217.656 596.466 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://www.apache.org/dyn/closer.cgi/lucene/hadoop/)
-/S /URI >>
-/H /I
->>
-endobj
-28 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 90.0 500.532 224.304 488.532 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://issues.apache.org/jira/browse/HADOOP?report=com.atlassian.jira.plugin.system.project:changelog-panel)
-/S /URI >>
-/H /I
->>
-endobj
-29 0 obj
-<< /Length 1091 /Filter [ /ASCII85Decode /FlateDecode ]
- >>
-stream
-Gat=*9lnc;&A@7.kXLu6W[1l8_i:k8E+"3Ni;ajf=E_X8<fiTDiRbE@m!,'XSWn:1B4\j6\#\=t]q/$VbpRufN"kc,5FD^4+E8C=G9lmsY8Ij"B>c:*R!b=7IerXp1I`4:>P!4T['0Q0)=^N8;jKg^?BH=25<0P+aY,FL#U"*eo`,aQ*?ktS`4KT[f8Y)OY'Cubl>sKbFT#ZNd.O^"A@aCJV%7KW[tD)mB%t+LK_]I#K2J%?4(H<,.(>uWVX/u8:G(RJqR"bHO@FY51L2KS,1F)/VG<V,=EV2Q%F0*dA5@2=M[,L@609\/;3Vb6<Eehqb;a_\KrW_Tgo3orY!I25Cf$[DIBW-7ZNe/?Djpi<S^uJ%e7sm]W75[cC+.eCATM@4g/:)$>Tdb--T?'^<N4H$0"'H)a`jGK_^W/sMX%2/PjRei:&m\9c%80i`pK&DJipF<of\`<%2MN%3Jgkf%SB!MPIWa7ob>^0"5*MaXP@<^UuA\71"HNB!%^*Z*.0Q+d(Oid(IoJt'(FUla!]osb>AH-1h'.ArlVeZSh?cFH6uo(6#,D;/(dlgE%68[4S%.JMst*+NjctlZup^U85@]tZ2J/V:+5/-0AuSi64LJ.(H*9AgSeKJFlOO*]c?+0felN)W8e?X2>dk2d(UT]P"N"Ck13MpO<I(31gVZ>V8i,X\OD+T1Rl3o<b>9B3!%bnFPEYC*@Eo]Lp!I]9#JB;>O>Gnk1N)Q2m&_</8o3SDI*-SX(Ot\DO&(JToR'bmopi*)Fk>tT`X4SKZERgLm,_K#n8+),?o3S4/)hdh:$@.3'lUiUmWt'F&A!Mm));RLHTbl*P7ETj":g`%oC2ghaZe]*]\sO+<40eHN^9<#a6VQ^OY\PMV*CL)r;1HP8Su-H,&S5F^!^S^l=p7Qg1joc3kWS4ZYSU8/N)]Vr$6257>`G?A1U]mA3L6m^r]VF"pQ+p7V.o6ke66s4hSt.@nVKPH,6lla7^=:0^6i/!:H?-4m3`GV\aQNEP_O/lqB#ah<YL8Op#$i^"[lCVE]!nS4UIZ$gD.\*`%nIV!+LFk.TM."[`G@'m0XA?/2Q4-cKuofqffCC1~>
-endstream
-endobj
-30 0 obj
-<< /Type /Page
-/Parent 1 0 R
-/MediaBox [ 0 0 612 792 ]
-/Resources 3 0 R
-/Contents 29 0 R
-/Annots 31 0 R
->>
-endobj
-31 0 obj
-[
-32 0 R
-]
-endobj
-32 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 237.64 564.79 308.512 556.79 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://wiki.apache.org/lucene-hadoop/Hadoop_0.14_Upgrade)
-/S /URI >>
-/H /I
->>
-endobj
-34 0 obj
-<<
- /Title (\376\377\0\61\0\40\0\104\0\157\0\167\0\156\0\154\0\157\0\141\0\144)
- /Parent 33 0 R
- /Next 35 0 R
- /A 9 0 R
->> endobj
-35 0 obj
-<<
- /Title (\376\377\0\62\0\40\0\122\0\145\0\154\0\145\0\141\0\163\0\145\0\40\0\116\0\157\0\164\0\145\0\163)
- /Parent 33 0 R
- /Prev 34 0 R
- /Next 36 0 R
- /A 11 0 R
->> endobj
-36 0 obj
-<<
- /Title (\376\377\0\63\0\40\0\116\0\145\0\167\0\163)
- /Parent 33 0 R
- /First 37 0 R
- /Last 41 0 R
- /Prev 35 0 R
- /Count -5
- /A 13 0 R
->> endobj
-37 0 obj
-<<
- /Title (\376\377\0\63\0\56\0\61\0\40\0\62\0\67\0\40\0\116\0\157\0\166\0\145\0\155\0\142\0\145\0\162\0\54\0\40\0\62\0\60\0\60\0\67\0\72\0\40\0\162\0\145\0\154\0\145\0\141\0\163\0\145\0\40\0\60\0\56\0\61\0\65\0\56\0\61\0\40\0\141\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145)
- /Parent 36 0 R
- /Next 38 0 R
- /A 15 0 R
->> endobj
-38 0 obj
-<<
- /Title (\376\377\0\63\0\56\0\62\0\40\0\62\0\66\0\40\0\116\0\157\0\166\0\145\0\155\0\142\0\145\0\162\0\54\0\40\0\62\0\60\0\60\0\67\0\72\0\40\0\162\0\145\0\154\0\145\0\141\0\163\0\145\0\40\0\60\0\56\0\61\0\64\0\56\0\64\0\40\0\141\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145)
- /Parent 36 0 R
- /Prev 37 0 R
- /Next 39 0 R
- /A 17 0 R
->> endobj
-39 0 obj
-<<
- /Title (\376\377\0\63\0\56\0\63\0\40\0\62\0\71\0\40\0\117\0\143\0\164\0\157\0\142\0\145\0\162\0\40\0\62\0\60\0\60\0\67\0\72\0\40\0\162\0\145\0\154\0\145\0\141\0\163\0\145\0\40\0\60\0\56\0\61\0\65\0\56\0\60\0\40\0\141\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145)
- /Parent 36 0 R
- /Prev 38 0 R
- /Next 40 0 R
- /A 19 0 R
->> endobj
-40 0 obj
-<<
- /Title (\376\377\0\63\0\56\0\64\0\40\0\61\0\71\0\40\0\117\0\143\0\164\0\157\0\142\0\145\0\162\0\54\0\40\0\62\0\60\0\60\0\67\0\72\0\40\0\162\0\145\0\154\0\145\0\141\0\163\0\145\0\40\0\60\0\56\0\61\0\64\0\56\0\63\0\40\0\141\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145)
- /Parent 36 0 R
- /Prev 39 0 R
- /Next 41 0 R
- /A 21 0 R
->> endobj
-41 0 obj
-<<
- /Title (\376\377\0\63\0\56\0\65\0\40\0\64\0\40\0\123\0\145\0\160\0\164\0\145\0\155\0\142\0\145\0\162\0\54\0\40\0\62\0\60\0\60\0\67\0\72\0\40\0\162\0\145\0\154\0\145\0\141\0\163\0\145\0\40\0\60\0\56\0\61\0\64\0\56\0\61\0\40\0\141\0\166\0\141\0\151\0\154\0\141\0\142\0\154\0\145)
- /Parent 36 0 R
- /Prev 40 0 R
- /A 23 0 R
->> endobj
-42 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F3
-/BaseFont /Helvetica-Bold
-/Encoding /WinAnsiEncoding >>
-endobj
-43 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F5
-/BaseFont /Times-Roman
-/Encoding /WinAnsiEncoding >>
-endobj
-44 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F1
-/BaseFont /Helvetica
-/Encoding /WinAnsiEncoding >>
-endobj
-45 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F2
-/BaseFont /Helvetica-Oblique
-/Encoding /WinAnsiEncoding >>
-endobj
-46 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F7
-/BaseFont /Times-Bold
-/Encoding /WinAnsiEncoding >>
-endobj
-1 0 obj
-<< /Type /Pages
-/Count 3
-/Kids [6 0 R 25 0 R 30 0 R ] >>
-endobj
-2 0 obj
-<< /Type /Catalog
-/Pages 1 0 R
- /Outlines 33 0 R
- /PageMode /UseOutlines
- >>
-endobj
-3 0 obj
-<< 
-/Font << /F3 42 0 R /F5 43 0 R /F1 44 0 R /F2 45 0 R /F7 46 0 R >> 
-/ProcSet [ /PDF /ImageC /Text ] >> 
-endobj
-9 0 obj
-<<
-/S /GoTo
-/D [25 0 R /XYZ 85.0 659.0 null]
->>
-endobj
-11 0 obj
-<<
-/S /GoTo
-/D [25 0 R /XYZ 85.0 551.066 null]
->>
-endobj
-13 0 obj
-<<
-/S /GoTo
-/D [25 0 R /XYZ 85.0 477.532 null]
->>
-endobj
-15 0 obj
-<<
-/S /GoTo
-/D [25 0 R /XYZ 85.0 446.398 null]
->>
-endobj
-17 0 obj
-<<
-/S /GoTo
-/D [25 0 R /XYZ 85.0 395.145 null]
->>
-endobj
-19 0 obj
-<<
-/S /GoTo
-/D [25 0 R /XYZ 85.0 343.892 null]
->>
-endobj
-21 0 obj
-<<
-/S /GoTo
-/D [25 0 R /XYZ 85.0 279.439 null]
->>
-endobj
-23 0 obj
-<<
-/S /GoTo
-/D [25 0 R /XYZ 85.0 228.186 null]
->>
-endobj
-33 0 obj
-<<
- /First 34 0 R
- /Last 36 0 R
->> endobj
-xref
-0 47
-0000000000 65535 f 
-0000008771 00000 n 
-0000008843 00000 n 
-0000008935 00000 n 
-0000000015 00000 n 
-0000000071 00000 n 
-0000000789 00000 n 
-0000000909 00000 n 
-0000000983 00000 n 
-0000009058 00000 n 
-0000001118 00000 n 
-0000009121 00000 n 
-0000001255 00000 n 
-0000009187 00000 n 
-0000001392 00000 n 
-0000009253 00000 n 
-0000001528 00000 n 
-0000009319 00000 n 
-0000001664 00000 n 
-0000009385 00000 n 
-0000001800 00000 n 
-0000009451 00000 n 
-0000001936 00000 n 
-0000009517 00000 n 
-0000002073 00000 n 
-0000003848 00000 n 
-0000003971 00000 n 
-0000004005 00000 n 
-0000004207 00000 n 
-0000004465 00000 n 
-0000005649 00000 n 
-0000005772 00000 n 
-0000005799 00000 n 
-0000009583 00000 n 
-0000006006 00000 n 
-0000006145 00000 n 
-0000006328 00000 n 
-0000006484 00000 n 
-0000006825 00000 n 
-0000007180 00000 n 
-0000007524 00000 n 
-0000007873 00000 n 
-0000008215 00000 n 
-0000008328 00000 n 
-0000008438 00000 n 
-0000008546 00000 n 
-0000008662 00000 n 
-trailer
-<<
-/Size 47
-/Root 2 0 R
-/Info 4 0 R
->>
-startxref
-9634
-%%EOF

+ 831 - 0
docs/streaming.html

@@ -0,0 +1,831 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<meta name="http-equiv" content="Content-Type">
+<meta name="content" content="text/html;">
+<meta name="charset" content="utf-8">
+<title>Hadoop Streaming</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/hadoop/">Hadoop</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="images/lucene_green_150.gif" title="Apache Lucene"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://lucene.apache.org/hadoop/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="lucene.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://lucene.apache.org/hadoop/">Project</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.15 Documentation</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Documentation</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="quickstart.html">Quickstart</a>
+</div>
+<div class="menuitem">
+<a href="cluster_setup.html">Cluster Setup</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_design.html">HDFS Architecture</a>
+</div>
+<div class="menuitem">
+<a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">Streaming</div>
+</div>
+<div class="menuitem">
+<a href="api/index.html">API Docs</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/lucene-hadoop/">Wiki</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
+</div>
+<div class="menuitem">
+<a href="http://lucene.apache.org/hadoop/mailing_lists.html">Mailing Lists</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="streaming.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Hadoop Streaming</h1>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#Hadoop+Streaming">Hadoop Streaming</a>
+</li>
+<li>
+<a href="#How+Does+Streaming+Work">How Does Streaming Work </a>
+</li>
+<li>
+<a href="#Package+Files+With+Job+Submissions">Package Files With Job Submissions</a>
+</li>
+<li>
+<a href="#Streaming+Options+and+Usage">Streaming Options and Usage </a>
+<ul class="minitoc">
+<li>
+<a href="#Mapper-Only+Jobs">Mapper-Only Jobs </a>
+</li>
+<li>
+<a href="#Specifying+Other+Plugins+for+Jobs">Specifying Other Plugins for Jobs </a>
+</li>
+<li>
+<a href="#Large+files+and+archives+in+Hadoop+Streaming">Large files and archives in Hadoop Streaming </a>
+</li>
+<li>
+<a href="#Specifying+Additional+Configuration+Variables+for+Jobs">Specifying Additional Configuration Variables for Jobs </a>
+</li>
+<li>
+<a href="#Other+Supported+Options">Other Supported Options </a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#More+usage+examples">More usage examples </a>
+<ul class="minitoc">
+<li>
+<a href="#Customizing+the+Way+to+Split+Lines+into+Key%2FValue+Pairs">Customizing the Way to Split Lines into Key/Value Pairs </a>
+</li>
+<li>
+<a href="#A+Useful+Partitioner+Class+%28secondary+sort%2C+the+-partitioner+org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner+option%29">A Useful Partitioner Class (secondary sort, the -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner option) </a>
+</li>
+<li>
+<a href="#Working+with+the+Hadoop+Aggregate+Package+%28the+-reduce+aggregate+option%29">Working with the Hadoop Aggregate Package (the -reduce aggregate option) </a>
+</li>
+<li>
+<a href="#Field+Selection+%28+similar+to+unix+%27cut%27+command%29">Field Selection ( similar to unix 'cut' command) </a>
+</li>
+</ul>
+</li>
+<li>
+<a href="#Frequently+Asked+Questions">Frequently Asked Questions </a>
+<ul class="minitoc">
+<li>
+<a href="#How+do+I+use+Hadoop+Streaming+to+run+an+arbitrary+set+of+%28semi-%29independent+tasks%3F">How do I use Hadoop Streaming to run an arbitrary set of (semi-)independent tasks? </a>
+</li>
+<li>
+<a href="#How+do+I+process+files%2C+one+per+map%3F">How do I process files, one per map? </a>
+</li>
+<li>
+<a href="#How+many+reducers+should+I+use%3F">How many reducers should I use? </a>
+</li>
+<li>
+<a href="#If+I+set+up+an+alias+in+my+shell+script%2C+will+that+work+after+-mapper%2C+i.e.+say+I+do%3A+alias+c1%3D%27cut+-f1%27.+Will+-mapper+%22c1%22+work%3F">If I set up an alias in my shell script, will that work after -mapper, i.e. say I do: alias c1='cut -f1'. Will -mapper "c1" work? </a>
+</li>
+<li>
+<a href="#Can+I+use+UNIX+pipes%3F+For+example%2C+will+-mapper+%22cut+-f1+%7C+sed+s%2Ffoo%2Fbar%2Fg%22+work%3F">Can I use UNIX pipes? For example, will -mapper "cut -f1 | sed s/foo/bar/g" work?</a>
+</li>
+<li>
+<a href="#When+I+run+a+streaming+job+by">When I run a streaming job by distributing large executables (for example, 3.6G) through the -file option, I get a "No space left on device" error. What do I do? </a>
+</li>
+<li>
+<a href="#How+do+I+specify+multiple+input+directories%3F">How do I specify multiple input directories? </a>
+</li>
+<li>
+<a href="#How+do+I+generate+output+files+with+gzip+format%3F">How do I generate output files with gzip format? </a>
+</li>
+<li>
+<a href="#How+do+I+provide+my+own+input%2Foutput+format+with+streaming%3F">How do I provide my own input/output format with streaming? </a>
+</li>
+<li>
+<a href="#How+do+I+parse+XML+documents+using+streaming%3F">How do I parse XML documents using streaming? </a>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+
+<a name="N10018"></a><a name="Hadoop+Streaming"></a>
+<h2 class="h3">Hadoop Streaming</h2>
+<div class="section">
+<p>
+Hadoop streaming is a utility that comes with the Hadoop distribution. The utility allows you to create and run map/reduce jobs with any executable or script as the mapper and/or the reducer. For example:
+</p>
+<pre class="code">
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper /bin/cat \
+    -reducer /bin/wc
+</pre>
+</div>
+
+
+<a name="N10026"></a><a name="How+Does+Streaming+Work"></a>
+<h2 class="h3">How Does Streaming Work </h2>
+<div class="section">
+<p>
+In the above example, both the mapper and the reducer are executables that read the input from stdin (line by line) and emit the output to stdout. The utility will create a map/reduce job, submit the job to an appropriate cluster, and monitor the progress of the job until it completes.
+</p>
+<p>
+  When an executable is specified for mappers, each mapper task will launch the executable as a separate process when the mapper is initialized. As the mapper task runs, it converts its inputs into lines and feed the lines to the stdin of the process. In the meantime, the mapper collects the line oriented outputs from the stdout of the process and converts each line into a key/value pair, which is collected as the output of the mapper. By default, the 
+  <em>prefix of a line up to the first tab character</em> is the <strong>key</strong> and the the rest of the line (excluding the tab character) will be the <strong>value</strong>. However, this can be customized, as <a href="#Customizing_the_Way_to_Split_Lin">discussed later</a>.
+</p>
+<p>
+When an executable is specified for reducers, each reducer task will launch the executable as a separate process then the reducer is initialized. As the reducer task runs, it converts its input key/values pairs into lines and feeds the lines to the stdin of the process. In the meantime, the reducer collects the line oriented outputs from the stdout of the process, converts each line into a key/value pair, which is collected as the output of the reducer. By default, the prefix of a line up to the first tab character is the key and the the rest of the line (excluding the tab character) is the value. However, this can be customized, as <a href="#Customizing_the_Way_to_Split_Lin">discussed later</a>.
+</p>
+<p>
+This is the basis for the communication protocol between the map/reduce framework and the streaming mapper/reducer.
+</p>
+<p>
+You can supply a Java class as the mapper and/or the reducer. The above example is equivalent to:
+</p>
+<pre class="code">
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper org.apache.hadoop.mapred.lib.IdentityMapper \
+    -reducer /bin/wc
+</pre>
+</div>
+
+
+<a name="N1004E"></a><a name="Package+Files+With+Job+Submissions"></a>
+<h2 class="h3">Package Files With Job Submissions</h2>
+<div class="section">
+<p>
+You can specify any executable as the mapper and/or the reducer. The executables do not need to pre-exist on the machines in the cluster; however, if they don't, you will need to use "-file" option to tell the framework to pack your executable files as a part of job submission. For example:
+</p>
+<pre class="code">
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper myPythonScript.py \
+    -reducer /bin/wc \
+    -file myPythonScript.py 
+</pre>
+<p>
+The above example specifies a user defined Python executable as the mapper. The option "-file myPythonScript.py" causes the python executable shipped to the cluster machines as a part of job submission.
+</p>
+<p>
+In addition to executable files, you can also package other auxiliary files (such as dictionaries, configuration files, etc) that may be used by the mapper and/or the reducer. For example:
+</p>
+<pre class="code">
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper myPythonScript.py \
+    -reducer /bin/wc \
+    -file myPythonScript.py \
+    -file myDictionary.txt
+</pre>
+</div>
+
+
+<a name="N10066"></a><a name="Streaming+Options+and+Usage"></a>
+<h2 class="h3">Streaming Options and Usage </h2>
+<div class="section">
+<a name="N1006C"></a><a name="Mapper-Only+Jobs"></a>
+<h3 class="h4">Mapper-Only Jobs </h3>
+<p>
+Often, you may want to process input data using a map function only. To do this, simply set mapred.reduce.tasks to zero. The map/reduce framework will not create any reducer tasks. Rather, the outputs of the mapper tasks will be the final output of the job.
+</p>
+<p>
+To be backward compatible, Hadoop Streaming also supports the "-reduce NONE" option, which is equivalent to "-jobconf mapred.reduce.tasks=0".
+</p>
+<a name="N10078"></a><a name="Specifying+Other+Plugins+for+Jobs"></a>
+<h3 class="h4">Specifying Other Plugins for Jobs </h3>
+<p>
+Just as with a normal map/reduce job, you can specify other plugins for a streaming job:
+</p>
+<pre class="code">
+   -inputformat JavaClassName
+   -outputformat JavaClassName
+   -partitioner JavaClassName
+   -combiner JavaClassName
+</pre>
+<p>
+The class you supply for the input format should return key/value pairs of Text class. If you do not specify an input format class, the TextInputFormat is used as the default. Since the TextInputFormat returns keys of LongWritable class, which are actually not part of the input data, the keys will be discarded; only the values will be piped to the streaming mapper.
+</p>
+<p>
+The class you supply for the output format is expected to take key/value pairs of Text class. If you do not specify an output format class, the TextOutputFormat is used as the default.
+</p>
+<a name="N1008B"></a><a name="Large+files+and+archives+in+Hadoop+Streaming"></a>
+<h3 class="h4">Large files and archives in Hadoop Streaming </h3>
+<p>
+The -cacheFile and -cacheArchive options allow you to make files and archives available to the tasks. The argument is a URI to the file or archive that you have already uploaded to HDFS. These files and archives are cached across jobs. You can retrieve the host and fs_port values from the fs.default.name config variable.
+</p>
+<p>
+Here are examples of the -cacheFile option:
+</p>
+<pre class="code">
+-cacheFile hdfs://host:fs_port/user/testfile.txt#testlink
+</pre>
+<p>
+In the above example, the part of the url after # is used as the symlink name that is created in the current working directory of tasks. So the tasks will have a symlink called testlink in the cwd that points to a local copy of testfile.txt. Multiple entries can be specified as: 
+</p>
+<pre class="code">
+-cacheFile hdfs://host:fs_port/user/testfile1.txt#testlink1 -cacheFile hdfs://host:fs_port/user/testfile2.txt#testlink2
+</pre>
+<p>
+The -cacheArchive option allows you to copy jars locally to the cwd of tasks and automatically unjar the files. For example:
+</p>
+<pre class="code">
+-cacheArchive hdfs://host:fs_port/user/testfile.jar#testlink3
+</pre>
+<p>
+In the example above, a symlink testlink3 is created in the current working directory of tasks. This symlink points to the directory that stores the unjarred contents of the uploaded jar file.
+</p>
+<p>
+Here's another example of the -cacheArchive option. Here, the input.txt file has two lines specifying the names of the two files: testlink/cache.txt and testlink/cache2.txt. "testlink" is a symlink to the archived directory, which has the files "cache.txt" and "cache2.txt".
+</p>
+<pre class="code">
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+                  -input "/user/me/samples/cachefile/input.txt"  \
+                  -mapper "xargs cat"  \
+                  -reducer "cat"  \
+                  -output "/user/me/samples/cachefile/out" \  
+                  -cacheArchive 'hdfs://hadoop-nn1.example.com:8020/user/me/samples/cachefile/cachedir.jar#testlink' \  
+                  -jobconf mapred.map.tasks=1 \
+                  -jobconf mapred.reduce.tasks=1 \ 
+                  -jobconf mapred.job.name="Experiment"
+
+$ ls test_jar/
+cache.txt  cache2.txt
+
+$ jar cvf cachedir.jar -C test_jar/ .
+added manifest
+adding: cache.txt(in = 30) (out= 29)(deflated 3%)
+adding: cache2.txt(in = 37) (out= 35)(deflated 5%)
+
+$ hadoop dfs -put cachedir.jar samples/cachefile
+
+$ hadoop dfs -cat /user/me/samples/cachefile/input.txt
+testlink/cache.txt
+testlink/cache2.txt
+
+$ cat test_jar/cache.txt 
+This is just the cache string
+
+$ cat test_jar/cache2.txt 
+This is just the second cache string
+
+$ hadoop dfs -ls /user/me/samples/cachefile/out      
+Found 1 items
+/user/me/samples/cachefile/out/part-00000  &lt;r 3&gt;   69
+
+$ hadoop dfs -cat /user/me/samples/cachefile/out/part-00000
+This is just the cache string   
+This is just the second cache string
+
+</pre>
+<a name="N100B4"></a><a name="Specifying+Additional+Configuration+Variables+for+Jobs"></a>
+<h3 class="h4">Specifying Additional Configuration Variables for Jobs </h3>
+<p>
+You can specify additional configuration variables by using "-jobconf  &lt;n&gt;=&lt;v&gt;". For example: 
+</p>
+<pre class="code">
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper org.apache.hadoop.mapred.lib.IdentityMapper\
+    -reducer /bin/wc \
+    -jobconf mapred.reduce.tasks=2
+</pre>
+<p>
+The -jobconf mapred.reduce.tasks=2 in the above example specifies to use two reducers for the job.
+</p>
+<p>
+For more details on the jobconf parameters see: <a href="http://wiki.apache.org/lucene-hadoop/JobConfFile">http://wiki.apache.org/lucene-hadoop/JobConfFile</a>
+</p>
+<a name="N100CB"></a><a name="Other+Supported+Options"></a>
+<h3 class="h4">Other Supported Options </h3>
+<p>
+Other options you may specify for a streaming job are described here:
+</p>
+<table class="ForrestTable" cellspacing="1" cellpadding="4">
+
+<tr>
+<th colspan="1" rowspan="1">Parameter</th><th colspan="1" rowspan="1">Optional/Required </th><th colspan="1" rowspan="1">Description </th>
+</tr>
+
+<tr>
+<td colspan="1" rowspan="1"> -cluster name </td><td colspan="1" rowspan="1"> Optional </td><td colspan="1" rowspan="1"> Switch between local Hadoop and one or more remote clusters </td>
+</tr>
+
+
+<tr>
+<td colspan="1" rowspan="1"> -dfs  host:port or local </td><td colspan="1" rowspan="1"> Optional </td><td colspan="1" rowspan="1"> Override the DFS configuration for the job </td>
+</tr>
+
+<tr>
+<td colspan="1" rowspan="1"> -jt host:port or local </td><td colspan="1" rowspan="1"> Optional </td><td colspan="1" rowspan="1"> Override the JobTracker configuration for the job </td>
+</tr>
+
+<tr>
+<td colspan="1" rowspan="1"> -additionalconfspec specfile </td><td colspan="1" rowspan="1"> Optional </td><td colspan="1" rowspan="1"> Specifies a set of configuration variables in an XML file like hadoop-site.xml, instead of using multiple  options of type "-jobconf name=value" </td>
+</tr>
+
+
+<tr>
+<td colspan="1" rowspan="1"> -cmdenv   name=value </td><td colspan="1" rowspan="1"> Optional </td><td colspan="1" rowspan="1"> Pass env var to streaming commands </td>
+</tr>
+
+<tr>
+<td colspan="1" rowspan="1"> -cacheFile fileNameURI </td><td colspan="1" rowspan="1"> Optional </td><td colspan="1" rowspan="1"> Specify a file to be uploaded to the HDFS </td>
+</tr>
+
+<tr>
+<td colspan="1" rowspan="1"> -cacheArchive fileNameURI </td><td colspan="1" rowspan="1"> Optional </td><td colspan="1" rowspan="1"> Specify a jar file to be uploaded to the HDFS. This jar file is unjarred automatically in the cwd of the task </td>
+</tr>
+
+
+<tr>
+<td colspan="1" rowspan="1"> -inputreader JavaClassName </td><td colspan="1" rowspan="1"> Optional </td><td colspan="1" rowspan="1"> For backwards-compatibility: specifies a record reader class (instead of an input format class) </td>
+</tr>
+
+<tr>
+<td colspan="1" rowspan="1"> -verbose </td><td colspan="1" rowspan="1"> Optional </td><td colspan="1" rowspan="1"> Verbose output </td>
+</tr>
+
+</table>
+<p>
+To switch between "local" Hadoop and one or more remote Hadoop clusters use -cluster &lt;name&gt;.
+By default, hadoop-default.xml and hadoop-site.xml are used. The -cluster &lt;name&gt; option will cause $HADOOP_HOME/conf/hadoop-&lt;name&gt;.xml to be used instead.
+</p>
+<p>
+To change the local temp directory use:
+</p>
+<pre class="code">
+  -jobconf dfs.data.dir=/tmp
+</pre>
+<p>
+To specify additional local temp directories use:
+</p>
+<pre class="code">
+   -jobconf mapred.local.dir=/tmp/local
+   -jobconf mapred.system.dir=/tmp/system
+   -jobconf mapred.temp.dir=/tmp/temp
+</pre>
+<p>
+For more details on jobconf parameters see: <a href="http://wiki.apache.org/lucene-hadoop/JobConfFile">http://wiki.apache.org/lucene-hadoop/JobConfFile</a>
+
+</p>
+<p>
+To set an environment variable in a streaming command use:
+</p>
+<pre class="code">
+-cmdenv EXAMPLE_DIR=/home/example/dictionaries/
+</pre>
+</div>
+
+
+<a name="N10183"></a><a name="More+usage+examples"></a>
+<h2 class="h3">More usage examples </h2>
+<div class="section">
+<a name="N10189"></a><a name="Customizing+the+Way+to+Split+Lines+into+Key%2FValue+Pairs"></a>
+<h3 class="h4">Customizing the Way to Split Lines into Key/Value Pairs </h3>
+<p>
+As noted earlier, when the map/reduce framework reads a line from the stdout of the mapper, it splits the line into a key/value pair. By default, the prefix of the line up to the first tab character is the key and the the rest of the line (excluding the tab character) is the value.
+</p>
+<p>
+However, you can customize this default. You can specify a field separator other than the tab character (the default), and you can specify the nth (n &gt;= 1) character rather than the first character in a line (the default) as the separator between the key and value. For example:
+</p>
+<pre class="code">
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper org.apache.hadoop.mapred.lib.IdentityMapper \
+    -reducer org.apache.hadoop.mapred.lib.IdentityReducer \
+    -jobconf stream.map.output.field.separator=. \
+    -jobconf stream.num.map.output.key.fields=4 
+</pre>
+<p>
+In the above example, "-jobconf stream.map.output.field.separator=." specifies "." as the field separator for the map outputs, and the prefix up to the fourth "." in a line will be the key and the rest of the line (excluding the fourth ".") will be the value. If a line has less than four "."s, then the whole line will be the key and the value will be an empty Text object (like the one created by new Text("")).
+</p>
+<p>
+Similarly, you can use "-jobconf stream.reduce.output.field.separator=SEP" and "-jobconf stream.num.reduce.output.fields=NUM" to specify the nth field separator in a line of the reduce outputs as the separator between the key and the value.
+</p>
+<a name="N1019F"></a><a name="A+Useful+Partitioner+Class+%28secondary+sort%2C+the+-partitioner+org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner+option%29"></a>
+<h3 class="h4">A Useful Partitioner Class (secondary sort, the -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner option) </h3>
+<p>
+Hadoop has a library class, org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner, that is useful for many applications. This class allows the map/reduce framework to partition the map outputs based on prefixes of keys, not the whole keys. For example:
+</p>
+<pre class="code">
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper org.apache.hadoop.mapred.lib.IdentityMapper \
+    -reducer org.apache.hadoop.mapred.lib.IdentityReducer \
+    -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner \
+    -jobconf stream.map.output.field.separator=. \
+    -jobconf stream.num.map.output.key.fields=4 \
+    -jobconf map.output.key.field.separator=. \
+    -jobconf num.key.fields.for.partition=2 \
+    -jobconf mapred.reduce.tasks=12
+</pre>
+<p>
+Here, <em>-jobconf stream.map.output.field.separator=.</em> and <em>-jobconf stream.num.map.output.key.fields=4</em> are as explained in previous example. The two variables are used by streaming to identify the key/value pair of mapper. 
+</p>
+<p>
+The map output keys of the above map/reduce job normally have four fields separated by ".". However, the map/reduce framework will partition the map outputs by the first two fields of the keys using the <em>-jobconf num.key.fields.for.partition=2</em> option. Here, <em>-jobconf map.output.key.field.separator=.</em> specifies the separator for the partition. This guarantees that all the key/value pairs with the same first two fields in the keys will be partitioned into the same reducer.
+</p>
+<p>
+
+<em>This is effectively equivalent to specifying the first two fields as the primary key and the next two fields as the secondary. The primary key is used for partitioning, and the combination of the primary and secondary keys is used for sorting.</em> A simple illustration is shown here:
+</p>
+<p>
+Output of map (the keys)</p>
+<pre class="code">
+11.12.1.2
+11.14.2.3
+11.11.4.1
+11.12.1.1
+11.14.2.2
+
+</pre>
+<p>
+Partition into 3 reducers (the first 2 fields are used as keys for partition)</p>
+<pre class="code">
+11.11.4.1
+-----------
+11.12.1.2
+11.12.1.1
+-----------
+11.14.2.3
+11.14.2.2
+</pre>
+<p>
+Sorting within each partition for the reducer(all 4 fields used for sorting)</p>
+<pre class="code">
+11.11.4.1
+-----------
+11.12.1.1
+11.12.1.2
+-----------
+11.14.2.2
+11.14.2.3
+</pre>
+<a name="N101D5"></a><a name="Working+with+the+Hadoop+Aggregate+Package+%28the+-reduce+aggregate+option%29"></a>
+<h3 class="h4">Working with the Hadoop Aggregate Package (the -reduce aggregate option) </h3>
+<p>
+Hadoop has a library package called "Aggregate" (<a href="https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate">https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate</a>).  Aggregate provides a special reducer class and a special combiner class, and a list of simple aggregators that perform aggregations such as "sum", "max", "min" and so on  over a sequence of values. Aggregate allows you to define a mapper plugin class that is expected to generate "aggregatable items" for each input key/value pair of the mappers. The combiner/reducer will aggregate those aggregatable items by invoking the appropriate aggregators.
+</p>
+<p>
+To use Aggregate, simply specify "-reducer aggregate":
+</p>
+<pre class="code">
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper myAggregatorForKeyCount.py \
+    -reducer aggregate \
+    -file myAggregatorForKeyCount.py \
+    -jobconf mapred.reduce.tasks=12
+</pre>
+<p>
+The python program myAggregatorForKeyCount.py looks like:
+</p>
+<pre class="code">
+#!/usr/bin/python
+
+import sys;
+
+def generateLongCountToken(id):
+    return "LongValueSum:" + id + "\t" + "1"
+
+def main(argv):
+    line = sys.stdin.readline();
+    try:
+        while line:
+            line = line[:-1];
+            fields = line.split("\t");
+            print generateLongCountToken(fields[0]);
+            line = sys.stdin.readline();
+    except "end of file":
+        return None
+if __name__ == "__main__":
+     main(sys.argv)
+</pre>
+<a name="N101F0"></a><a name="Field+Selection+%28+similar+to+unix+%27cut%27+command%29"></a>
+<h3 class="h4">Field Selection ( similar to unix 'cut' command) </h3>
+<p>
+Hadoop has a library class, org.apache.hadoop.mapred.lib.FieldSelectionMapReduce, that effectively allows you to process text data like the unix "cut" utility. The map function defined in the class treats each input key/value pair as a list of fields. You can specify the field separator (the default is the tab character). You can select an arbitrary list of fields as the map output key, and an arbitrary list of fields as the map output value. Similarly, the reduce function defined in the class treats each input key/value pair as a list of fields. You can select an arbitrary list of fields as the reduce output key, and an arbitrary list of fields as the reduce output value. For example:
+</p>
+<pre class="code">
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper org.apache.hadoop.mapred.lib.FieldSelectionMapReduce\
+    -reducer org.apache.hadoop.mapred.lib.FieldSelectionMapReduce\
+    -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner \
+    -jobconf map.output.key.field.separa=. \
+    -jobconf num.key.fields.for.partition=2 \
+    -jobconf mapred.data.field.separator=. \
+    -jobconf map.output.key.value.fields.spec=6,5,1-3:0- \
+    -jobconf reduce.output.key.value.fields.spec=0-2:5- \
+    -jobconf mapred.reduce.tasks=12
+</pre>
+<p>
+The option "-jobconf map.output.key.value.fields.spec=6,5,1-3:0-" specifies key/value selection for the map outputs. Key selection spec and value selection spec are separated by ":". In this case, the map output key will consist of fields 6, 5, 1, 2, and 3. The map output value will consist of all fields (0- means field 0 and all 
+the subsequent fields). 
+</p>
+<p>
+The option "-jobconf reduce.output.key.value.fields.spec=0-2:0-" specifies key/value selection for the reduce outputs. In this case, the reduce output key will consist of fields 0, 1, 2 (corresponding to the original fields 6, 5, 1). The reduce output value will consist of all fields starting from field 5 (corresponding to all the original fields).  
+</p>
+</div>
+
+
+<a name="N10204"></a><a name="Frequently+Asked+Questions"></a>
+<h2 class="h3">Frequently Asked Questions </h2>
+<div class="section">
+<a name="N1020A"></a><a name="How+do+I+use+Hadoop+Streaming+to+run+an+arbitrary+set+of+%28semi-%29independent+tasks%3F"></a>
+<h3 class="h4">How do I use Hadoop Streaming to run an arbitrary set of (semi-)independent tasks? </h3>
+<p>
+Often you do not need the full power of Map Reduce, but only need to run multiple instances of the same program - either on different parts of the data, or on the same data, but with different parameters. You can use Hadoop Streaming to do this.
+</p>
+<a name="N10214"></a><a name="How+do+I+process+files%2C+one+per+map%3F"></a>
+<h3 class="h4">How do I process files, one per map? </h3>
+<p>
+As an example, consider the problem of zipping (compressing) a set of files across the hadoop cluster. You can achieve this using either of these methods:
+</p>
+<ol>
+
+<li> Hadoop Streaming and custom mapper script:<ul>
+  
+<li> Generate a file containing the full DFS path of the input files. Each map task would get one file name as input.</li>
+  
+<li> Create a mapper script which, given a filename, will get the file to local disk, gzip the file and put it back in the desired output directory</li>
+
+</ul>
+</li>
+
+<li>The existing Hadoop Framework:<ul>
+   
+<li>Add these commands to your main function:
+<pre class="code">
+       OutputFormatBase.setCompressOutput(conf, true);
+       OutputFormatBase.setOutputCompressorClass(conf, org.apache.hadoop.io.compress.GzipCodec.class);
+       conf.setOutputFormat(NonSplitableTextInputFormat.class);
+       conf.setNumReduceTasks(0);
+</pre>
+</li>
+   
+<li>Write your map function:
+<pre class="code">
+
+       public void map(WritableComparable key, Writable value, 
+                               OutputCollector output, 
+                               Reporter reporter) throws IOException {
+            output.collect((Text)value, null);
+       }
+</pre>
+</li>
+  
+<li>Note that the output filename will not be the same as the original filename</li>
+
+</ul>
+</li>
+
+</ol>
+<a name="N1023F"></a><a name="How+many+reducers+should+I+use%3F"></a>
+<h3 class="h4">How many reducers should I use? </h3>
+<p>
+See the Hadoop Wiki for details: <a href="http://wiki.apache.org/lucene-hadoop/HowManyMapsAndReduces">http://wiki.apache.org/lucene-hadoop/HowManyMapsAndReduces</a>
+
+</p>
+<a name="N1024D"></a><a name="If+I+set+up+an+alias+in+my+shell+script%2C+will+that+work+after+-mapper%2C+i.e.+say+I+do%3A+alias+c1%3D%27cut+-f1%27.+Will+-mapper+%22c1%22+work%3F"></a>
+<h3 class="h4">If I set up an alias in my shell script, will that work after -mapper, i.e. say I do: alias c1='cut -f1'. Will -mapper "c1" work? </h3>
+<p>
+Using an alias will not work, but variable substitution is allowed as shown in this example:
+</p>
+<pre class="code">
+$ hadoop dfs -cat samples/student_marks
+alice   50
+bruce   70
+charlie 80
+dan     75
+
+$ c2='cut -f2'; $HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input /user/me/samples/student_marks 
+    -mapper \"$c2\" -reducer 'cat'  
+    -output /user/me/samples/student_out 
+    -jobconf mapred.job.name='Experiment'
+
+$ hadoop dfs -ls samples/student_out
+Found 1 items/user/me/samples/student_out/part-00000    &lt;r 3&gt;   16
+
+$ hadoop dfs -cat samples/student_out/part-00000
+50
+70
+75
+80
+</pre>
+<a name="N1025B"></a><a name="Can+I+use+UNIX+pipes%3F+For+example%2C+will+-mapper+%22cut+-f1+%7C+sed+s%2Ffoo%2Fbar%2Fg%22+work%3F"></a>
+<h3 class="h4">Can I use UNIX pipes? For example, will -mapper "cut -f1 | sed s/foo/bar/g" work?</h3>
+<p>
+Currently this does not work and gives an "java.io.IOException: Broken pipe" error. This is probably a bug that needs to be investigated.
+</p>
+<a name="N10265"></a><a name="When+I+run+a+streaming+job+by"></a>
+<h3 class="h4">When I run a streaming job by distributing large executables (for example, 3.6G) through the -file option, I get a "No space left on device" error. What do I do? </h3>
+<p>
+The jar packaging happens in a directory pointed to by the configuration variable stream.tmpdir. The default value of stream.tmpdir is /tmp. Set the value to a directory with more space:
+</p>
+<pre class="code">
+-jobconf stream.tmpdir=/export/bigspace/...
+</pre>
+<a name="N10276"></a><a name="How+do+I+specify+multiple+input+directories%3F"></a>
+<h3 class="h4">How do I specify multiple input directories? </h3>
+<p>
+You can specify multiple input directories with multiple '-input' options:
+</p>
+<pre class="code">
+ hadoop jar hadoop-streaming.jar -input '/user/foo/dir1' -input '/user/foo/dir2' 
+</pre>
+<a name="N10283"></a><a name="How+do+I+generate+output+files+with+gzip+format%3F"></a>
+<h3 class="h4">How do I generate output files with gzip format? </h3>
+<p>
+Instead of plain text files, you can generate gzip files as your generated output. Pass '-jobconf mapred.output.compress=true -jobconf  mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCode' as option to your streaming job.
+</p>
+<a name="N1028D"></a><a name="How+do+I+provide+my+own+input%2Foutput+format+with+streaming%3F"></a>
+<h3 class="h4">How do I provide my own input/output format with streaming? </h3>
+<p>
+At least as late as version 0.14, Hadoop does not support multiple jar files. So, when specifying your own custom classes you will have to pack them along with the streaming jar and use the custom jar instead of the default hadoop streaming jar. 
+</p>
+<a name="N10297"></a><a name="How+do+I+parse+XML+documents+using+streaming%3F"></a>
+<h3 class="h4">How do I parse XML documents using streaming? </h3>
+<p>
+You can use the record reader StreamXmlRecordReader to process XML documents. 
+</p>
+<pre class="code">
+hadoop jar hadoop-streaming.jar -inputreader "StreamXmlRecord,begin=BEGIN_STRING,end=END_STRING" ..... (rest of the command)
+</pre>
+<p>
+Anything found between BEGIN_STRING and END_STRING would be treated as one record for map tasks.
+</p>
+</div>
+
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 347 - 0
docs/streaming.pdf


+ 0 - 275
docs/version_control.html

@@ -1,275 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html>
-<head>
-<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
-<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
-<meta name="Forrest-skin-name" content="pelt">
-<title>Hadoop Version Control System</title>
-<link type="text/css" href="skin/basic.css" rel="stylesheet">
-<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
-<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
-<link type="text/css" href="skin/profile.css" rel="stylesheet">
-<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
-<link rel="shortcut icon" href="images/favicon.ico">
-</head>
-<body onload="init()">
-<script type="text/javascript">ndeSetTextSize();</script>
-<div id="top">
-<!--+
-    |breadtrail
-    +-->
-<div class="breadtrail">
-<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/hadoop/">Hadoop</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
-</div>
-<!--+
-    |header
-    +-->
-<div class="header">
-<!--+
-    |start group logo
-    +-->
-<div class="grouplogo">
-<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="images/lucene_green_150.gif" title="Apache Lucene"></a>
-</div>
-<!--+
-    |end group logo
-    +-->
-<!--+
-    |start Project Logo
-    +-->
-<div class="projectlogo">
-<a href="http://lucene.apache.org/hadoop/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Scalable Computing Platform"></a>
-</div>
-<!--+
-    |end Project Logo
-    +-->
-<!--+
-    |start Search
-    +-->
-<div class="searchbox">
-<form action="http://www.google.com/search" method="get" class="roundtopsmall">
-<input value="lucene.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
-                    <input name="Search" value="Search" type="submit">
-</form>
-</div>
-<!--+
-    |end search
-    +-->
-<!--+
-    |start Tabs
-    +-->
-<ul id="tabs">
-<li class="current">
-<a class="selected" href="index.html">Main</a>
-</li>
-<li>
-<a class="unselected" href="http://wiki.apache.org/lucene-hadoop">Wiki</a>
-</li>
-</ul>
-<!--+
-    |end Tabs
-    +-->
-</div>
-</div>
-<div id="main">
-<div id="publishedStrip">
-<!--+
-    |start Subtabs
-    +-->
-<div id="level2tabs"></div>
-<!--+
-    |end Endtabs
-    +-->
-<script type="text/javascript"><!--
-document.write("Last Published: " + document.lastModified);
-//  --></script>
-</div>
-<!--+
-    |breadtrail
-    +-->
-<div class="breadtrail">
-
-             &nbsp;
-           </div>
-<!--+
-    |start Menu, mainarea
-    +-->
-<!--+
-    |start Menu
-    +-->
-<div id="menu">
-<div onclick="SwitchMenu('menu_1.1', 'skin/')" id="menu_1.1Title" class="menutitle">Project</div>
-<div id="menu_1.1" class="menuitemgroup">
-<div class="menuitem">
-<a href="releases.html">Releases</a>
-</div>
-<div class="menuitem">
-<a href="releases.html#News">News</a>
-</div>
-<div class="menuitem">
-<a href="credits.html">Credits</a>
-</div>
-<div class="menuitem">
-<a href="http://www.cafepress.com/hadoop/">Buy Stuff</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Documentation</div>
-<div id="menu_1.2" class="menuitemgroup">
-<div class="menuitem">
-<a href="documentation.html">Overview</a>
-</div>
-<div class="menuitem">
-<a href="quickstart.html">Quickstart</a>
-</div>
-<div class="menuitem">
-<a href="cluster_setup.html">Cluster Setup</a>
-</div>
-<div class="menuitem">
-<a href="hdfs_design.html">HDFS Architecture</a>
-</div>
-<div class="menuitem">
-<a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
-</div>
-<div class="menuitem">
-<a href="api/index.html">API Docs</a>
-</div>
-<div class="menuitem">
-<a href="http://wiki.apache.org/lucene-hadoop/">Wiki</a>
-</div>
-<div class="menuitem">
-<a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>
-</div>
-<div class="menuitem">
-<a href="mailing_lists.html#Users">Mailing Lists</a>
-</div>
-</div>
-<div onclick="SwitchMenu('menu_selected_1.3', 'skin/')" id="menu_selected_1.3Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Developers</div>
-<div id="menu_selected_1.3" class="selectedmenuitemgroup" style="display: block;">
-<div class="menuitem">
-<a href="mailing_lists.html#Developers">Mailing Lists</a>
-</div>
-<div class="menuitem">
-<a href="issue_tracking.html">Issue Tracking</a>
-</div>
-<div class="menupage">
-<div class="menupagetitle">Version Control</div>
-</div>
-<div class="menuitem">
-<a href="http://lucene.zones.apache.org:8080/hudson/job/Hadoop-Nightly/">Nightly Build</a>
-</div>
-<div class="menuitem">
-<a href="irc.html">IRC Channel</a>
-</div>
-</div>
-<div id="credit"></div>
-<div id="roundbottom">
-<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
-<!--+
-  |alternative credits
-  +-->
-<div id="credit2"></div>
-</div>
-<!--+
-    |end Menu
-    +-->
-<!--+
-    |start content
-    +-->
-<div id="content">
-<div title="Portable Document Format" class="pdflink">
-<a class="dida" href="version_control.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
-        PDF</a>
-</div>
-<h1>Hadoop Version Control System</h1>
-<div id="minitoc-area">
-<ul class="minitoc">
-<li>
-<a href="#Overview">Overview</a>
-</li>
-<li>
-<a href="#Web+Access+%28read-only%29">Web Access (read-only)</a>
-</li>
-<li>
-<a href="#Anonymous+Access+%28read-only%29">Anonymous Access (read-only)</a>
-</li>
-<li>
-<a href="#Committer+Access+%28read-write%29">Committer Access (read-write)</a>
-</li>
-</ul>
-</div>
-  
-    
-<a name="N1000C"></a><a name="Overview"></a>
-<h2 class="h3">Overview</h2>
-<div class="section">
-<p>
-        The Hadoop source code resides in the Apache <a href="http://subversion.tigris.org/">Subversion (SVN)</a> repository.
-        The command-line SVN client can be obtained <a href="http://subversion.tigris.org/project_packages.html">here</a>.
-        The TortoiseSVN GUI client for Windows can be obtained <a href="http://tortoisesvn.tigris.org/">here</a>. There
-        are also SVN plugins available for both <a href="http://subclipse.tigris.org/">Eclipse</a> and 
-        <a href="http://svnup.tigris.org/">IntelliJ IDEA</a>.
-      </p>
-</div>
-    
-    
-<a name="N1002A"></a><a name="Web+Access+%28read-only%29"></a>
-<h2 class="h3">Web Access (read-only)</h2>
-<div class="section">
-<p>
-        The source code can be browsed via the Web at 
-        <a href="http://svn.apache.org/viewcvs.cgi/lucene/hadoop/">http://svn.apache.org/viewcvs.cgi/lucene/hadoop/</a>.
-        No SVN client software is required.
-      </p>
-</div>
-    
-    
-<a name="N10038"></a><a name="Anonymous+Access+%28read-only%29"></a>
-<h2 class="h3">Anonymous Access (read-only)</h2>
-<div class="section">
-<p>
-        The SVN URL for anonymous users is 
-        <a href="http://svn.apache.org/repos/asf/lucene/hadoop/">http://svn.apache.org/repos/asf/lucene/hadoop/</a>.
-        Instructions for anonymous SVN access are 
-        <a href="http://www.apache.org/dev/version-control.html#anon-svn">here</a>.
-      </p>
-</div>
-    
-    
-<a name="N1004A"></a><a name="Committer+Access+%28read-write%29"></a>
-<h2 class="h3">Committer Access (read-write)</h2>
-<div class="section">
-<p>
-        The SVN URL for committers is 
-        <a href="https://svn.apache.org/repos/asf/lucene/hadoop/">https://svn.apache.org/repos/asf/lucene/hadoop/</a>.
-        Instructions for committer SVN access are 
-        <a href="http://www.apache.org/dev/version-control.html#https-svn">here</a>.
-      </p>
-</div>
-    
-  
-</div>
-<!--+
-    |end content
-    +-->
-<div class="clearboth">&nbsp;</div>
-</div>
-<div id="footer">
-<!--+
-    |start bottomstrip
-    +-->
-<div class="lastmodified">
-<script type="text/javascript"><!--
-document.write("Last Published: " + document.lastModified);
-//  --></script>
-</div>
-<div class="copyright">
-        Copyright &copy;
-         2007 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
-</div>
-<!--+
-    |end bottomstrip
-    +-->
-</div>
-</body>
-</html>

+ 0 - 384
docs/version_control.pdf

@@ -1,384 +0,0 @@
-%PDF-1.3
-%ª«¬­
-4 0 obj
-<< /Type /Info
-/Producer (FOP 0.20.5) >>
-endobj
-5 0 obj
-<< /Length 508 /Filter [ /ASCII85Decode /FlateDecode ]
- >>
-stream
-Gb!$CZ#51J&;GD#i^P]A[11,0-;gP<92u_76dpU:8WX!KNKYc/?VI3HksE"X2$;Y0pRT&4qPU<:2\1Fn#X_dAfXu58_oa3S]8&X\E`%Wi&B0a^nErMOKp:=(N%'h"4o\N\[ZZVEb[dK6gt9QgZ.5Qi_$?cs>sAh[h!_6Lis_%m3]0L8j[PUEb\[*\[,6.1?8T7,e71c5FZk>U.EDc%q5GETeb#?B5OPPDp-S-sE>V"!&cL\R\Dn,(Aq<a5R=bLe1Bee2G1:G>BkKQMF4%ES?MI-'`E5$fj'm,RWH;3LaU+U=Ln?AScU>T'^S@AWZ8pKVfD^s_)"RG<jC&[pprj?\P.U"IXQrJoTa&ULWm]l25CiRoUD%ubg$_l.ZhL?R/dUTq1C3jc@RM(^!N#PW9oZ'ubEO]<.1_hVBnS4*6k#4YZ-*E]7oc\Bg7$TE11DJc0t1#k(,hOrZeUOIA>p*-E\_^HCPkf0-Ae7&)oj-!hQ7HL[W<Qg+H-4&OBsd(~>
-endstream
-endobj
-6 0 obj
-<< /Type /Page
-/Parent 1 0 R
-/MediaBox [ 0 0 612 792 ]
-/Resources 3 0 R
-/Contents 5 0 R
-/Annots 7 0 R
->>
-endobj
-7 0 obj
-[
-8 0 R
-10 0 R
-12 0 R
-14 0 R
-]
-endobj
-8 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 102.0 559.666 157.316 547.666 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A 9 0 R
-/H /I
->>
-endobj
-10 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 102.0 541.466 226.616 529.466 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A 11 0 R
-/H /I
->>
-endobj
-12 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 102.0 523.266 262.628 511.266 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A 13 0 R
-/H /I
->>
-endobj
-14 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 102.0 505.066 259.292 493.066 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A 15 0 R
-/H /I
->>
-endobj
-16 0 obj
-<< /Length 1470 /Filter [ /ASCII85Decode /FlateDecode ]
- >>
-stream
-GatU3cZbj<&AJ%FcnROK7R@`l[ksRde$lNmgUQXEklG;5junZ.KUq<+IRepM85)@uQV1pQRNJ.cbmY[0Zh_WI&V^CpGaJYJ4X2A*5D#$uFFo46KE<a6kf4RWkNIM<UHeY=+KIj^+4&>1[.lJi?Hdl78TX`?et*9fNeKa"Z+ghNN_q+MEq\DWK*f[4Tl&.b5Y<_gg3AO8F92D6=%_kMXOY&-]\bNQnt\nCHmQ1!jJDagpAJ=cd`fe8N&DXIjT`sr&n:r&"dhbmMo(92Ys;+uO3f,_CMW4\<GU*NAC%lVme0:=:?SVs$gG\c+o)KhT-*4p@2;7u-Zo!*T4W;kV0RQ6p[.SNH'b:%$V#`nVmEPe:?fDle^7mJU.9?N_*MmXC!L:)]1pK!7?NqnNBh%C>)QXO[3qm/74u8PC0@ImAi0W?9d1QDn-B7.X<bXhVcGP60!,AFr^KrZ"@CDC1OY-R0;tmO\X%l:Dnb(g"7Vs5-[a7`Q$UrXE97HcQ*iTd'/UnB*"iUm66$rs80$YX0WUQBS+GV0)$/$.GG[ac.PLO<+;A"=0PFPSf6QVgK(P/e7Ck&2#cLBrDaj_8n=C:(&&oKa/:IsJ]aIp%2;I^sT0e5$W:TJZiPCIa(Vt0RKM.BD>><oO9hMV`i?I-E[4I45dn&,WDK11q6P7f<\?Lk<T$s5S-F#JOT@]F-8C@<UVRWu(8(Z*@-^Y+F[AG"#^<<A)qQik1Nu7W<aO8Um.Z7foqOS3Uc6uFSC'P]:3AtHQELL9Q-g@9/]KF;*hsRe%QZ&Iu<?1ROAB#(Y:?r_bOmlu@6]7`P(@[sH9OU`[6K$3k\uG#O+7m$,PW3M,/QUDJDDm::TQ.EckJ]WKV4;Imq*%$n:]!OB^YYN1*dd]Y>GkL4p#D]^jPK9*o%-(/jQ]`cYo'@7Ltb$Y09OFUQ,g#\leeP+H%]T=^Qi8%5e<9S",[Oqq>04AOj9:W04:tl>8[KcHG2iT(=9%JTK%ZGT_S^6(.@e`8MdgLi`1#f6I!e1:\DWk"EKoT?Lb"gB`O3r`GMU?C7j%\X6Jl!%A6Ig>^oq:DOQY3NM1@s#W2=11rK^d'pI3ClbkGdCeO$JqkgPq*u7E\ZIYIieA$T00!B%n]h87D.^.r,gaCJ3"a_coiON?.+6@jZT@jPuYoS9P6Rd\ZO?uq!"G"#sk.9):@XIR_bMBC.rtaEr.\>CYF:Y(/;/7n;mb.29IOEsr12WBs`^$j7^h.@o]jg7rhE(2LG\JU>6W6L!M?#,u;hjK:&3V6,rC`@*hL0uBInh1RdFgJU/a)B$<*<;&)!LCJ\sD(o)!dLW"?o*qhXMW#e$u+KK;CYTRMgm>_DjpiHsT<K@fN<@3=&-gb"0m-gZ+YjN;KXjkHJlG"HX9h>$"d?GHCB.VAYk#gcSiaM3DTa6r*#R=tA6$Be/(do=!1s<;U^$)N%tl=A_&OM6/pD_>lX,W=7GX&&&Lj49~>
-endstream
-endobj
-17 0 obj
-<< /Type /Page
-/Parent 1 0 R
-/MediaBox [ 0 0 612 792 ]
-/Resources 3 0 R
-/Contents 16 0 R
-/Annots 18 0 R
->>
-endobj
-18 0 obj
-[
-19 0 R
-20 0 R
-21 0 R
-22 0 R
-23 0 R
-24 0 R
-25 0 R
-26 0 R
-27 0 R
-28 0 R
-29 0 R
-]
-endobj
-19 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 318.6 629.666 407.592 617.666 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://subversion.tigris.org/)
-/S /URI >>
-/H /I
->>
-endobj
-20 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 297.3 616.466 317.952 604.466 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://subversion.tigris.org/project_packages.html)
-/S /URI >>
-/H /I
->>
-endobj
-21 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 168.312 603.266 188.964 591.266 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://tortoisesvn.tigris.org/)
-/S /URI >>
-/H /I
->>
-endobj
-22 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 418.92 603.266 454.248 591.266 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://subclipse.tigris.org/)
-/S /URI >>
-/H /I
->>
-endobj
-23 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 477.576 603.266 510.912 591.266 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://svnup.tigris.org/)
-/S /URI >>
-/H /I
->>
-endobj
-24 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 90.0 590.066 118.656 578.066 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://svnup.tigris.org/)
-/S /URI >>
-/H /I
->>
-endobj
-25 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 90.0 524.532 327.636 512.532 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://svn.apache.org/viewcvs.cgi/lucene/hadoop/)
-/S /URI >>
-/H /I
->>
-endobj
-26 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 279.648 472.198 503.616 460.198 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://svn.apache.org/repos/asf/lucene/hadoop/)
-/S /URI >>
-/H /I
->>
-endobj
-27 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 303.288 458.998 323.94 446.998 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://www.apache.org/dev/version-control.html#anon-svn)
-/S /URI >>
-/H /I
->>
-endobj
-28 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 250.656 406.664 479.292 394.664 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (https://svn.apache.org/repos/asf/lucene/hadoop/)
-/S /URI >>
-/H /I
->>
-endobj
-29 0 obj
-<< /Type /Annot
-/Subtype /Link
-/Rect [ 297.288 393.464 317.94 381.464 ]
-/C [ 0 0 0 ]
-/Border [ 0 0 0 ]
-/A << /URI (http://www.apache.org/dev/version-control.html#https-svn)
-/S /URI >>
-/H /I
->>
-endobj
-31 0 obj
-<<
- /Title (\376\377\0\61\0\40\0\117\0\166\0\145\0\162\0\166\0\151\0\145\0\167)
- /Parent 30 0 R
- /Next 32 0 R
- /A 9 0 R
->> endobj
-32 0 obj
-<<
- /Title (\376\377\0\62\0\40\0\127\0\145\0\142\0\40\0\101\0\143\0\143\0\145\0\163\0\163\0\40\0\50\0\162\0\145\0\141\0\144\0\55\0\157\0\156\0\154\0\171\0\51)
- /Parent 30 0 R
- /Prev 31 0 R
- /Next 33 0 R
- /A 11 0 R
->> endobj
-33 0 obj
-<<
- /Title (\376\377\0\63\0\40\0\101\0\156\0\157\0\156\0\171\0\155\0\157\0\165\0\163\0\40\0\101\0\143\0\143\0\145\0\163\0\163\0\40\0\50\0\162\0\145\0\141\0\144\0\55\0\157\0\156\0\154\0\171\0\51)
- /Parent 30 0 R
- /Prev 32 0 R
- /Next 34 0 R
- /A 13 0 R
->> endobj
-34 0 obj
-<<
- /Title (\376\377\0\64\0\40\0\103\0\157\0\155\0\155\0\151\0\164\0\164\0\145\0\162\0\40\0\101\0\143\0\143\0\145\0\163\0\163\0\40\0\50\0\162\0\145\0\141\0\144\0\55\0\167\0\162\0\151\0\164\0\145\0\51)
- /Parent 30 0 R
- /Prev 33 0 R
- /A 15 0 R
->> endobj
-35 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F3
-/BaseFont /Helvetica-Bold
-/Encoding /WinAnsiEncoding >>
-endobj
-36 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F5
-/BaseFont /Times-Roman
-/Encoding /WinAnsiEncoding >>
-endobj
-37 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F1
-/BaseFont /Helvetica
-/Encoding /WinAnsiEncoding >>
-endobj
-38 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F2
-/BaseFont /Helvetica-Oblique
-/Encoding /WinAnsiEncoding >>
-endobj
-39 0 obj
-<< /Type /Font
-/Subtype /Type1
-/Name /F7
-/BaseFont /Times-Bold
-/Encoding /WinAnsiEncoding >>
-endobj
-1 0 obj
-<< /Type /Pages
-/Count 2
-/Kids [6 0 R 17 0 R ] >>
-endobj
-2 0 obj
-<< /Type /Catalog
-/Pages 1 0 R
- /Outlines 30 0 R
- /PageMode /UseOutlines
- >>
-endobj
-3 0 obj
-<< 
-/Font << /F3 35 0 R /F5 36 0 R /F1 37 0 R /F2 38 0 R /F7 39 0 R >> 
-/ProcSet [ /PDF /ImageC /Text ] >> 
-endobj
-9 0 obj
-<<
-/S /GoTo
-/D [17 0 R /XYZ 85.0 659.0 null]
->>
-endobj
-11 0 obj
-<<
-/S /GoTo
-/D [17 0 R /XYZ 85.0 567.066 null]
->>
-endobj
-13 0 obj
-<<
-/S /GoTo
-/D [17 0 R /XYZ 85.0 501.532 null]
->>
-endobj
-15 0 obj
-<<
-/S /GoTo
-/D [17 0 R /XYZ 85.0 435.998 null]
->>
-endobj
-30 0 obj
-<<
- /First 31 0 R
- /Last 34 0 R
->> endobj
-xref
-0 40
-0000000000 65535 f 
-0000006741 00000 n 
-0000006806 00000 n 
-0000006898 00000 n 
-0000000015 00000 n 
-0000000071 00000 n 
-0000000670 00000 n 
-0000000790 00000 n 
-0000000836 00000 n 
-0000007021 00000 n 
-0000000971 00000 n 
-0000007084 00000 n 
-0000001108 00000 n 
-0000007150 00000 n 
-0000001245 00000 n 
-0000007216 00000 n 
-0000001382 00000 n 
-0000002945 00000 n 
-0000003068 00000 n 
-0000003165 00000 n 
-0000003346 00000 n 
-0000003548 00000 n 
-0000003732 00000 n 
-0000003913 00000 n 
-0000004091 00000 n 
-0000004266 00000 n 
-0000004465 00000 n 
-0000004665 00000 n 
-0000004873 00000 n 
-0000005074 00000 n 
-0000007282 00000 n 
-0000005283 00000 n 
-0000005422 00000 n 
-0000005655 00000 n 
-0000005924 00000 n 
-0000006185 00000 n 
-0000006298 00000 n 
-0000006408 00000 n 
-0000006516 00000 n 
-0000006632 00000 n 
-trailer
-<<
-/Size 40
-/Root 2 0 R
-/Info 4 0 R
->>
-startxref
-7333
-%%EOF

+ 0 - 1
src/docs/src/documentation/content/.htaccess

@@ -1 +0,0 @@
-RedirectMatch Permanent ^/hadoop/about(.*) http://lucene.apache.org/hadoop/index$1

+ 0 - 32
src/docs/src/documentation/content/xdocs/credits.xml

@@ -1,32 +0,0 @@
-<?xml version="1.0"?>
-
-<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" 
-          "http://forrest.apache.org/dtd/document-v20.dtd">
-
-<document> 
-
-<header>
-  <title>Hadoop credits</title> 
-</header> 
-
-<body>
-
-<section>
-<title>Committers</title>
-<ul>
-  <li>Andrzej Bialecki</li>
-  <li>Mike Cafarella</li>
-  <li><a href="http://blog.lucene.com/">Doug Cutting</a></li>
-  <li><a href="http://people.apache.org/~nigel">Nigel Daley</a></li>
-  <li>Jim Kellerman (contrib/hbase)</li>
-  <li><a href="http://people.apache.org/~omalley">Owen O'Malley</a></li>
-  <li><a href="http://weblogs.java.net/blog/tomwhite/">Tom White</a></li>
-  <li><a href="http://people.apache.org/~acmurthy">Arun C Murthy</a></li>
-  <li><a href="http://people.apache.org/~ddas">Devaraj Das</a></li>
-  <li><a href="http://people.apache.org/~enis">Enis Soztutar</a></li>
-  <li><a href="http://people.apache.org/~taton">Christophe Taton</a></li>
-</ul>
-</section>
-
-</body>
-</document>

+ 0 - 28
src/docs/src/documentation/content/xdocs/documentation.xml

@@ -1,28 +0,0 @@
-<?xml version="1.0"?>
-
-<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
-
-<document>
-  
-  <header>
-    <title>Hadoop Documentation</title>
-  </header>
-  
-  <body>
-    <p>
-    The following documents provide concepts and procedures that will help you 
-    get started using Hadoop. If you have more questions, you can ask the 
-    <a href="mailing_lists.html">mailing list</a> or browse the archives.
-    </p>
-    <ul>
-      <li><a href="quickstart.html">Hadoop Quickstart</a></li>
-      <li><a href="cluster_setup.html">Hadoop Cluster Setup</a></li>
-      <li><a href="hdfs_design.html">Hadoop Distributed File System</a></li>
-      <li><a href="mapred_tutorial.html">Hadoop Map-Reduce Tutorial</a></li>
-      <li><a href="ext:api/index">API Docs</a></li>
-      <li><a href="ext:wiki">Wiki</a></li>
-      <li><a href="ext:faq">FAQ</a></li>
-    </ul>
-  </body>
-  
-</document>

+ 20 - 79
src/docs/src/documentation/content/xdocs/index.xml

@@ -1,87 +1,28 @@
 <?xml version="1.0"?>
 <?xml version="1.0"?>
 
 
-<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" 
-          "http://forrest.apache.org/dtd/document-v20.dtd">
-
-<document> 
-
-  <header> 
-    <title>Welcome to Hadoop!</title> 
-  </header> 
-
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
+
+<document>
+  
+  <header>
+    <title>Hadoop Documentation</title>
+  </header>
+  
   <body>
   <body>
     <p>
     <p>
-    Hadoop is a software platform that lets one easily write and run
-    applications that process vast amounts of data.</p>
-
-    <p>Here's what makes Hadoop especially useful:</p>
+    The following documents provide concepts and procedures that will help you 
+    get started using Hadoop. If you have more questions, you can ask the 
+    <a href="ext:lists">mailing list</a> or browse the archives.
+    </p>
     <ul>
     <ul>
-      <li><strong>Scalable:</strong>
-      Hadoop can reliably store and process petabytes.</li>
-      <li><strong>Economical:</strong>
-      It distributes the data and processing across clusters of
-      commonly available computers. These clusters can number into the
-      thousands of nodes.</li>
-      <li><strong>Efficient:</strong>
-      By distributing the data, Hadoop can process it in parallel on
-      the nodes where the data is located. This makes it extremely
-      rapid.</li>
-      <li><strong>Reliable:</strong>
-      Hadoop automatically maintains multiple copies of data and
-      automatically redeploys computing tasks based on failures.</li>
+      <li><a href="quickstart.html">Hadoop Quickstart</a></li>
+      <li><a href="cluster_setup.html">Hadoop Cluster Setup</a></li>
+      <li><a href="hdfs_design.html">Hadoop Distributed File System</a></li>
+      <li><a href="mapred_tutorial.html">Hadoop Map-Reduce Tutorial</a></li>
+      <li><a href="ext:api/index">API Docs</a></li>
+      <li><a href="ext:wiki">Wiki</a></li>
+      <li><a href="ext:faq">FAQ</a></li>
     </ul>
     </ul>
-    <p>
-    Hadoop implements <a
-    href="http://wiki.apache.org/lucene-hadoop/HadoopMapReduce">MapReduce</a>,
-    using the Hadoop Distributed File System (<a
-    href="hdfs_design.html"><acronym title="Hadoop Distributed File
-    System">HDFS</acronym></a>) (see figure below.)  MapReduce divides
-    applications into many small blocks of work.  HDFS creates
-    multiple replicas of data blocks for reliability, placing them on
-    compute nodes around the cluster.  MapReduce can then process the
-    data where it is located.
-    </p>
-
-    <p>Hadoop has been demonstrated on clusters with 2000 nodes.
-    The current design target is 10,000 node clusters.</p>
-
-    <p>Hadoop is a <a href="ext:lucene">Lucene</a> sub-project
-    that contains the distributed computing platform that was
-    formerly a part of <a href="ext:nutch">Nutch</a>. 
-    </p>
-
-    <p>For more information about Hadoop, please see the <a
-    href="ext:wiki">Hadoop wiki.</a></p>     
-
-
-    <figure alt="architecture" src="images/architecture.gif" />
-
-    <section>
-      <title> Getting Started </title>
-      <p>
-      The Hadoop project plans to scale Hadoop up to handling thousands of computers. However, to begin with you can start by installing in on a single machine or a very small cluster.
-      </p>
-      <ol>
-        <li><a href="documentation.html">Learn about</a> Hadoop by reading the documentation.</li>
-        <li><a href="releases.html">Download</a> Hadoop from the release page.</li>
-        <li>Hadoop <a href="quickstart.html">Quickstart</a>.</li>
-        <li><a href="cluster_setup.html">Hadoop Cluster Setup</a>.</li>
-        <li><a href="mailing_lists.html">Discuss it</a> on the mailing list.</li>
-      </ol>
-    </section>
-
-    <section>
-      <title> Getting Involved </title>
-      <p>
-      Hadoop is an open source volunteer project under the Apache Software Foundation. We encourage you to learn about the project and contribute your expertise. Here are some starter links:
-      </p>
-      <ol>
-        <li>See our <a href="http://wiki.apache.org/lucene-hadoop/HowToContribute">How to Contribute to Hadoop</a> page.</li>
-        <li>Give us <a href="issue_tracking.html">feedback</a>: What can we do better?</li>
-        <li>Join the <a href="mailing_lists.html">mailing list</a>: Meet the community.</li>
-      </ol>
-    </section>
-
   </body>
   </body>
-
+  
 </document>
 </document>

+ 0 - 23
src/docs/src/documentation/content/xdocs/irc.xml

@@ -1,23 +0,0 @@
-<?xml version="1.0"?>
-
-<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" 
-          "http://forrest.apache.org/dtd/document-v20.dtd">
-
-<document>
-  
-  <header>
-    <title>Hadoop IRC Channel</title>
-  </header>
-  
-  <body>
-
-    <p>There is an IRC channel dedicated to hadoop at <strong>irc.freenode.org</strong>. 
-    The name of the channel is <strong>#hadoop</strong>.</p> 
-    
-    <p>
-      The IRC channel can be used for online discussion about hadoop related stuff, but developers should be careful to transfer all the official decisions or useful discussions to the issue tracking system.
-    </p>  
-
-  </body>
-  
-</document>

+ 0 - 22
src/docs/src/documentation/content/xdocs/issue_tracking.xml

@@ -1,22 +0,0 @@
-<?xml version="1.0"?>
-
-<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
-
-<document>
-  
-  <header>
-    <title>Hadoop Issue Tracking</title>
-  </header>
-  
-  <body>
-    <p>
-    Hadoop tracks both bugs and enhancement requests <a href="http://issues.apache.org/jira/browse/HADOOP">here</a> using Apache JIRA.
-    We welcome input, however, <strong>before filing a request,</strong> please make sure you do the following:
-    </p>
-    <ul>
-      <li>Search the JIRA database.</li>
-      <li>Check the user <a href="mailing_lists.html#Users">mailing list</a>, both by searching the archives and by asking questions.</li>
-    </ul>
-  </body>
-  
-</document>

+ 0 - 72
src/docs/src/documentation/content/xdocs/mailing_lists.xml

@@ -1,72 +0,0 @@
-<?xml version="1.0"?>
-
-<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" 
-          "http://forrest.apache.org/dtd/document-v20.dtd">
-
-<document>
-  
-  <header>
-    <title>Hadoop Mailing Lists</title>
-  </header>
-  
-  <body>
-  
-    <section>
-      <title>Users</title>
-
-      <p>If you use Hadoop, please subscribe to the Hadoop user mailing list.</p>
-
-      <p>
-        The Hadoop user mailing list is :
-        <a href="mailto:hadoop-user@lucene.apache.org">hadoop-user@lucene.apache.org</a>.
-      </p>      
-      <ul>
-        <li><a href="mailto:hadoop-user-subscribe@lucene.apache.org">Subscribe to List</a></li>
-        <li><a href="mailto:hadoop-user-unsubscribe@lucene.apache.org">Unsubscribe from List</a></li>
-        <li>Search List Archive on <a href="http://www.mail-archive.com/hadoop-user%40lucene.apache.org/">The Mail Archive</a>,
-        <a href="http://www.nabble.com/Hadoop-Users-f17067.html">Nabble</a>,
-        or <a href="http://dir.gmane.org/gmane.comp.jakarta.lucene.hadoop.user">Gmane</a></li>
-        <li><a href="http://mail-archives.apache.org/mod_mbox/lucene-hadoop-user/">View List Archive</a> (<a href="http://lucene.apache.org/mail/hadoop-user/">Raw files</a>)</li>
-      </ul>
-      <note>In order to post to the list, it is necessary to first subscribe to it.</note>
-    </section>
-  
-    <section>
-      <title>Developers</title>
-
-      <p>If you'd like to contribute to Hadoop, please subscribe to the
-      Hadoop developer mailing list.</p>
-
-      <p>
-        The Hadoop developer mailing list is :
-        <a href="mailto:hadoop-dev@lucene.apache.org">hadoop-dev@lucene.apache.org</a>.
-      </p>      
-      <ul>
-        <li><a href="mailto:hadoop-dev-subscribe@lucene.apache.org">Subscribe to List</a></li>
-        <li><a href="mailto:hadoop-dev-unsubscribe@lucene.apache.org">Unsubscribe from List</a></li>
-        <li>Search List Archive on <a href="http://www.mail-archive.com/hadoop-dev%40lucene.apache.org/">The Mail Archive</a>,
-        <a href="http://www.nabble.com/Hadoop-Dev-f17068.html">Nabble</a>,
-        or <a href="http://dir.gmane.org/gmane.comp.jakarta.lucene.hadoop.devel">Gmane</a></li>
-        <li><a href="http://mail-archives.apache.org/mod_mbox/lucene-hadoop-dev/">View List Archive</a> (<a href="http://lucene.apache.org/mail/hadoop-dev/">Raw files</a>)</li>
-      </ul>
-      <note>In order to post to the list, it is necessary to first subscribe to it.</note>
-    </section>
-  
-    <section>
-      <title>Commits</title>
-
-      <p>If you'd like to see changes made in Hadoop's <a
-      href="version_control.html">version control system</a>
-      then subscribe to the Hadoop commit mailing list.</p>
-
-      <ul>
-        <li><a href="mailto:hadoop-commits-subscribe@lucene.apache.org">Subscribe to List</a></li>
-        <li><a href="mailto:hadoop-commits-unsubscribe@lucene.apache.org">Unsubscribe from List</a></li>
-        <li>Search List Archive on <a href="http://www.mail-archive.com/hadoop-commits%40lucene.apache.org/">The Mail Archive</a></li>        
-        <li><a href="http://mail-archives.apache.org/mod_mbox/lucene-hadoop-commits/">View List Archive</a> (<a href="http://lucene.apache.org/mail/hadoop-commits/">Raw files</a>)</li>
-      </ul>
-    </section>
-  
-  </body>
-  
-</document>

+ 1 - 1
src/docs/src/documentation/content/xdocs/quickstart.xml

@@ -93,7 +93,7 @@
       
       
       <p>
       <p>
         First, you need to get a Hadoop distribution: download a recent 
         First, you need to get a Hadoop distribution: download a recent 
-        <a href="releases.html">stable release</a> and unpack it.
+        <a href="ext:releases">stable release</a> and unpack it.
       </p>
       </p>
 
 
       <p>
       <p>

+ 0 - 102
src/docs/src/documentation/content/xdocs/releases.xml

@@ -1,102 +0,0 @@
-<?xml version="1.0"?>
-
-<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" 
-          "http://forrest.apache.org/dtd/document-v20.dtd">
-
-<document> 
-
-  <header> 
-    <title>Releases</title> 
-  </header> 
-
-  <body> 
-
-    <section>
-      <title>Download</title>
-
-      <p>Releases may be downloaded from Apache mirrors.</p>
-
-      <p>
-      <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/">
-      <strong><strong>Download a release now!</strong></strong></a>
-      </p>
-
-      <p>On the mirror, all recent releases are available, but are not
-      guaranteed to be stable. For stable releases, look in the stable
-      directory.
-      </p>
-
-    </section>
-
-    <section>
-      <title>Release Notes</title>
-
-      <p>Release notes for Hadoop releases are available in Jira.</p>
-
-      <p>
-      <a href="http://issues.apache.org/jira/browse/HADOOP?report=com.atlassian.jira.plugin.system.project:changelog-panel">
-      <strong><strong>Browse release notes now!</strong></strong></a>
-      </p>
-
-    </section>
-
-    <section>
-      <title>News</title>
-
-      <section>
-        <title>27 November, 2007: release 0.15.1 available </title>
-        <p>This release fixes critical bugs in release 0.15.0.</p>
-      </section>
-
-      <section>
-        <title>26 November, 2007: release 0.14.4 available </title>
-        <p>This release fixes critical bugs in release 0.14.3.</p>
-      </section>
-
-      <section>
-        <title>29 October 2007: release 0.15.0 available </title>
-	<p>This release contains my improvements, new features, bug
-      fixes and optimizations.  See the release notes (above) for
-      details.</p>
-      </section>
-
-      <section>
-        <title>19 October, 2007: release 0.14.3 available </title>
-	<p>This release fixes critical bugs in release 0.14.2.</p>
-      </section>
-
-      <section>
-        <title> 4 September, 2007: release 0.14.1 available </title>
-	<p>New features in release 0.14 include:</p>
-
-	<ul>
-	  <li>Better checksums in HDFS.  Checksums are no longer
-	  stored in parallel HDFS files, but are stored directly by
-	  datanodes alongside blocks.  This is more efficient for the
-	  namenode and also improves data integrity.</li>
-
-	  <li>Pipes: A C++ API for MapReduce</li>
-
-	  <li>Eclipse Plugin, including HDFS browsing, job
-	  monitoring, etc.</li>
-
-	  <li>File modification times in HDFS.</li>
-	</ul>
-
-	<p>There are many other improvements, bug fixes, optimizations
-	and new features.  Performance and reliability are better than
-	ever.</p>
-
-	<note>When upgrading an existing HDFS filesystem to a 0.14.x
-	release from a 0.13.x or earlier release, you should first
-	start HDFS with 'bin/start-dfs.sh -upgrade'.  See the <a
-	href="http://wiki.apache.org/lucene-hadoop/Hadoop_0.14_Upgrade">Hadoop
-	0.14 Upgrade</a> page for details.</note>
-
-      </section>
-     
-    </section>
-
-  </body>
-
-</document>

+ 7 - 20
src/docs/src/documentation/content/xdocs/site.xml

@@ -16,39 +16,26 @@ See http://forrest.apache.org/docs/linking.html for more info.
 
 
 <site label="Hadoop" href="" xmlns="http://apache.org/forrest/linkmap/1.0">
 <site label="Hadoop" href="" xmlns="http://apache.org/forrest/linkmap/1.0">
 
 
-  <project label="Project">
-    <releases  label="Releases"           href="releases.html" />
-    <news      label="News"               href="releases.html#News" />
-    <credits   label="Credits"            href="credits.html" /> 
-    <store     label="Buy Stuff"          href="ext:store" />    
-  </project>
-
   <docs label="Documentation"> 
   <docs label="Documentation"> 
-    <overview  label="Overview"           href="documentation.html" />
+    <overview  label="Overview"           href="index.html" />
     <quickstart label="Quickstart"        href="quickstart.html" />
     <quickstart label="Quickstart"        href="quickstart.html" />
     <setup     label="Cluster Setup"      href="cluster_setup.html" />
     <setup     label="Cluster Setup"      href="cluster_setup.html" />
     <hdfs      label="HDFS Architecture"  href="hdfs_design.html" />
     <hdfs      label="HDFS Architecture"  href="hdfs_design.html" />
     <mapred    label="Map-Reduce Tutorial" href="mapred_tutorial.html" />
     <mapred    label="Map-Reduce Tutorial" href="mapred_tutorial.html" />
+    <streaming label="Streaming"          href="streaming.html" />
     <api       label="API Docs"           href="ext:api/index" />
     <api       label="API Docs"           href="ext:api/index" />
     <wiki      label="Wiki"               href="ext:wiki" />
     <wiki      label="Wiki"               href="ext:wiki" />
     <faq       label="FAQ"                href="ext:faq" />
     <faq       label="FAQ"                href="ext:faq" />
-    <usermail  label="Mailing Lists"      href="mailing_lists.html#Users" />
+    <lists     label="Mailing Lists"      href="ext:lists" />
   </docs>
   </docs>
 
 
-  <resources label="Developers">
-    <devmail   label="Mailing Lists"      href="mailing_lists.html#Developers" />
-    <issues    label="Issue Tracking"     href="issue_tracking.html" />
-    <vcs       label="Version Control"    href="version_control.html" />
-    <nightly   label="Nightly Build"      href="ext:nightly" />
-    <irc       label="IRC Channel"        href="irc.html" />
-  </resources>
-
   <external-refs>
   <external-refs>
+    <site      href="http://lucene.apache.org/hadoop/"/>
+    <lists     href="http://lucene.apache.org/hadoop/mailing_lists.html"/>
+    <releases  href="http://lucene.apache.org/hadoop/releases.html"/>
+    <jira      href="http://lucene.apache.org/hadoop/issue_tracking.html"/>
     <wiki      href="http://wiki.apache.org/lucene-hadoop/" />
     <wiki      href="http://wiki.apache.org/lucene-hadoop/" />
     <faq       href="http://wiki.apache.org/lucene-hadoop/FAQ" />
     <faq       href="http://wiki.apache.org/lucene-hadoop/FAQ" />
-    <nightly   href="http://lucene.zones.apache.org:8080/hudson/job/Hadoop-Nightly/" />
-    <releases  href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/" />
-    <store     href="http://www.cafepress.com/hadoop/" />
     <lucene    href="http://lucene.apache.org/" />
     <lucene    href="http://lucene.apache.org/" />
     <nutch     href="http://lucene.apache.org/nutch/" />
     <nutch     href="http://lucene.apache.org/nutch/" />
     <hadoop-default href="http://lucene.apache.org/hadoop/hadoop-default.html" />
     <hadoop-default href="http://lucene.apache.org/hadoop/hadoop-default.html" />

+ 542 - 0
src/docs/src/documentation/content/xdocs/streaming.xml

@@ -0,0 +1,542 @@
+<?xml version="1.0"?>
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+          "http://forrest.apache.org/dtd/document-v20.dtd">
+
+
+<document>
+<header>
+<title>Hadoop Streaming</title>
+<meta name="http-equiv">Content-Type</meta>
+<meta name="content">text/html;</meta>
+<meta name="charset">utf-8</meta>
+</header>
+<body>
+<section>
+<title>Hadoop Streaming</title>
+
+<p>
+Hadoop streaming is a utility that comes with the Hadoop distribution. The utility allows you to create and run map/reduce jobs with any executable or script as the mapper and/or the reducer. For example:
+</p>
+<source>
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper /bin/cat \
+    -reducer /bin/wc
+</source>
+</section>
+
+<section>
+<title>How Does Streaming Work </title>
+<p>
+In the above example, both the mapper and the reducer are executables that read the input from stdin (line by line) and emit the output to stdout. The utility will create a map/reduce job, submit the job to an appropriate cluster, and monitor the progress of the job until it completes.
+</p><p>
+  When an executable is specified for mappers, each mapper task will launch the executable as a separate process when the mapper is initialized. As the mapper task runs, it converts its inputs into lines and feed the lines to the stdin of the process. In the meantime, the mapper collects the line oriented outputs from the stdout of the process and converts each line into a key/value pair, which is collected as the output of the mapper. By default, the 
+  <em>prefix of a line up to the first tab character</em> is the <strong>key</strong> and the the rest of the line (excluding the tab character) will be the <strong>value</strong>. However, this can be customized, as <a href="#Customizing_the_Way_to_Split_Lin">discussed later</a>.
+</p>
+<p>
+When an executable is specified for reducers, each reducer task will launch the executable as a separate process then the reducer is initialized. As the reducer task runs, it converts its input key/values pairs into lines and feeds the lines to the stdin of the process. In the meantime, the reducer collects the line oriented outputs from the stdout of the process, converts each line into a key/value pair, which is collected as the output of the reducer. By default, the prefix of a line up to the first tab character is the key and the the rest of the line (excluding the tab character) is the value. However, this can be customized, as <a href="#Customizing_the_Way_to_Split_Lin">discussed later</a>.
+</p><p>
+This is the basis for the communication protocol between the map/reduce framework and the streaming mapper/reducer.
+</p><p>
+You can supply a Java class as the mapper and/or the reducer. The above example is equivalent to:
+</p>
+<source>
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper org.apache.hadoop.mapred.lib.IdentityMapper \
+    -reducer /bin/wc
+</source>
+</section>
+
+<section>
+<title>Package Files With Job Submissions</title>
+<p>
+You can specify any executable as the mapper and/or the reducer. The executables do not need to pre-exist on the machines in the cluster; however, if they don't, you will need to use "-file" option to tell the framework to pack your executable files as a part of job submission. For example:
+</p>
+<source>
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper myPythonScript.py \
+    -reducer /bin/wc \
+    -file myPythonScript.py 
+</source>
+<p>
+The above example specifies a user defined Python executable as the mapper. The option "-file myPythonScript.py" causes the python executable shipped to the cluster machines as a part of job submission.
+</p>
+<p>
+In addition to executable files, you can also package other auxiliary files (such as dictionaries, configuration files, etc) that may be used by the mapper and/or the reducer. For example:
+</p>
+<source>
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper myPythonScript.py \
+    -reducer /bin/wc \
+    -file myPythonScript.py \
+    -file myDictionary.txt
+</source>
+</section>
+
+<section>
+<title>Streaming Options and Usage </title>
+
+<section>
+<title>Mapper-Only Jobs </title>
+<p>
+Often, you may want to process input data using a map function only. To do this, simply set mapred.reduce.tasks to zero. The map/reduce framework will not create any reducer tasks. Rather, the outputs of the mapper tasks will be the final output of the job.
+</p><p>
+To be backward compatible, Hadoop Streaming also supports the "-reduce NONE" option, which is equivalent to "-jobconf mapred.reduce.tasks=0".
+</p>
+</section>
+
+<section>
+<title>Specifying Other Plugins for Jobs </title>
+<p>
+Just as with a normal map/reduce job, you can specify other plugins for a streaming job:
+</p>
+<source>
+   -inputformat JavaClassName
+   -outputformat JavaClassName
+   -partitioner JavaClassName
+   -combiner JavaClassName
+</source>
+<p>
+The class you supply for the input format should return key/value pairs of Text class. If you do not specify an input format class, the TextInputFormat is used as the default. Since the TextInputFormat returns keys of LongWritable class, which are actually not part of the input data, the keys will be discarded; only the values will be piped to the streaming mapper.
+</p><p>
+The class you supply for the output format is expected to take key/value pairs of Text class. If you do not specify an output format class, the TextOutputFormat is used as the default.
+</p>
+</section>
+
+<section>
+<title>Large files and archives in Hadoop Streaming </title>
+
+<p>
+The -cacheFile and -cacheArchive options allow you to make files and archives available to the tasks. The argument is a URI to the file or archive that you have already uploaded to HDFS. These files and archives are cached across jobs. You can retrieve the host and fs_port values from the fs.default.name config variable.
+</p>
+<p>
+Here are examples of the -cacheFile option:
+</p> 
+<source>
+-cacheFile hdfs://host:fs_port/user/testfile.txt#testlink
+</source>
+<p>
+In the above example, the part of the url after # is used as the symlink name that is created in the current working directory of tasks. So the tasks will have a symlink called testlink in the cwd that points to a local copy of testfile.txt. Multiple entries can be specified as: 
+</p>
+<source>
+-cacheFile hdfs://host:fs_port/user/testfile1.txt#testlink1 -cacheFile hdfs://host:fs_port/user/testfile2.txt#testlink2
+</source>
+<p>
+The -cacheArchive option allows you to copy jars locally to the cwd of tasks and automatically unjar the files. For example:
+</p>
+<source>
+-cacheArchive hdfs://host:fs_port/user/testfile.jar#testlink3
+</source>
+<p>
+In the example above, a symlink testlink3 is created in the current working directory of tasks. This symlink points to the directory that stores the unjarred contents of the uploaded jar file.
+</p>
+<p>
+Here's another example of the -cacheArchive option. Here, the input.txt file has two lines specifying the names of the two files: testlink/cache.txt and testlink/cache2.txt. "testlink" is a symlink to the archived directory, which has the files "cache.txt" and "cache2.txt".
+</p>
+<source>
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+                  -input "/user/me/samples/cachefile/input.txt"  \
+                  -mapper "xargs cat"  \
+                  -reducer "cat"  \
+                  -output "/user/me/samples/cachefile/out" \  
+                  -cacheArchive 'hdfs://hadoop-nn1.example.com:8020/user/me/samples/cachefile/cachedir.jar#testlink' \  
+                  -jobconf mapred.map.tasks=1 \
+                  -jobconf mapred.reduce.tasks=1 \ 
+                  -jobconf mapred.job.name="Experiment"
+
+$ ls test_jar/
+cache.txt  cache2.txt
+
+$ jar cvf cachedir.jar -C test_jar/ .
+added manifest
+adding: cache.txt(in = 30) (out= 29)(deflated 3%)
+adding: cache2.txt(in = 37) (out= 35)(deflated 5%)
+
+$ hadoop dfs -put cachedir.jar samples/cachefile
+
+$ hadoop dfs -cat /user/me/samples/cachefile/input.txt
+testlink/cache.txt
+testlink/cache2.txt
+
+$ cat test_jar/cache.txt 
+This is just the cache string
+
+$ cat test_jar/cache2.txt 
+This is just the second cache string
+
+$ hadoop dfs -ls /user/me/samples/cachefile/out      
+Found 1 items
+/user/me/samples/cachefile/out/part-00000  &lt;r 3&gt;   69
+
+$ hadoop dfs -cat /user/me/samples/cachefile/out/part-00000
+This is just the cache string   
+This is just the second cache string
+
+</source>
+</section>
+
+<section>
+<title>Specifying Additional Configuration Variables for Jobs </title>
+<p>
+You can specify additional configuration variables by using "-jobconf  &lt;n&gt;=&lt;v&gt;". For example: 
+</p>
+<source>
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper org.apache.hadoop.mapred.lib.IdentityMapper\
+    -reducer /bin/wc \
+    -jobconf mapred.reduce.tasks=2
+</source>
+<p>
+The -jobconf mapred.reduce.tasks=2 in the above example specifies to use two reducers for the job.
+</p>
+<p>
+For more details on the jobconf parameters see: <a href="http://wiki.apache.org/lucene-hadoop/JobConfFile">http://wiki.apache.org/lucene-hadoop/JobConfFile</a></p>
+</section>
+
+<section>
+<title>Other Supported Options </title>
+<p>
+Other options you may specify for a streaming job are described here:
+</p>
+<table>
+<tr><th>Parameter</th><th>Optional/Required </th><th>Description </th></tr>
+<tr><td> -cluster name </td><td> Optional </td><td> Switch between local Hadoop and one or more remote clusters </td></tr>
+
+<tr><td> -dfs  host:port or local </td><td> Optional </td><td> Override the DFS configuration for the job </td></tr>
+<tr><td> -jt host:port or local </td><td> Optional </td><td> Override the JobTracker configuration for the job </td></tr>
+<tr><td> -additionalconfspec specfile </td><td> Optional </td><td> Specifies a set of configuration variables in an XML file like hadoop-site.xml, instead of using multiple  options of type "-jobconf name=value" </td></tr>
+
+<tr><td> -cmdenv   name=value </td><td> Optional </td><td> Pass env var to streaming commands </td></tr>
+<tr><td> -cacheFile fileNameURI </td><td> Optional </td><td> Specify a file to be uploaded to the HDFS </td></tr>
+<tr><td> -cacheArchive fileNameURI </td><td> Optional </td><td> Specify a jar file to be uploaded to the HDFS. This jar file is unjarred automatically in the cwd of the task </td></tr>
+
+<tr><td> -inputreader JavaClassName </td><td> Optional </td><td> For backwards-compatibility: specifies a record reader class (instead of an input format class) </td></tr>
+<tr><td> -verbose </td><td> Optional </td><td> Verbose output </td></tr>
+</table>
+<p>
+To switch between "local" Hadoop and one or more remote Hadoop clusters use -cluster &lt;name&gt;.
+By default, hadoop-default.xml and hadoop-site.xml are used. The -cluster &lt;name&gt; option will cause $HADOOP_HOME/conf/hadoop-&lt;name&gt;.xml to be used instead.
+</p>
+<p>
+To change the local temp directory use:
+</p>
+<source>
+  -jobconf dfs.data.dir=/tmp
+</source>
+<p>
+To specify additional local temp directories use:
+</p>
+<source>
+   -jobconf mapred.local.dir=/tmp/local
+   -jobconf mapred.system.dir=/tmp/system
+   -jobconf mapred.temp.dir=/tmp/temp
+</source>
+<p>
+For more details on jobconf parameters see: <a href="http://wiki.apache.org/lucene-hadoop/JobConfFile">http://wiki.apache.org/lucene-hadoop/JobConfFile</a>
+</p><p>
+To set an environment variable in a streaming command use:
+</p>
+<source>
+-cmdenv EXAMPLE_DIR=/home/example/dictionaries/
+</source>
+</section>
+</section>
+
+<section>
+<title>More usage examples </title>
+
+<section>
+<title>Customizing the Way to Split Lines into Key/Value Pairs </title>
+<p>
+As noted earlier, when the map/reduce framework reads a line from the stdout of the mapper, it splits the line into a key/value pair. By default, the prefix of the line up to the first tab character is the key and the the rest of the line (excluding the tab character) is the value.
+</p>
+<p>
+However, you can customize this default. You can specify a field separator other than the tab character (the default), and you can specify the nth (n >= 1) character rather than the first character in a line (the default) as the separator between the key and value. For example:
+</p>
+
+<source>
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper org.apache.hadoop.mapred.lib.IdentityMapper \
+    -reducer org.apache.hadoop.mapred.lib.IdentityReducer \
+    -jobconf stream.map.output.field.separator=. \
+    -jobconf stream.num.map.output.key.fields=4 
+</source>
+<p>
+In the above example, "-jobconf stream.map.output.field.separator=." specifies "." as the field separator for the map outputs, and the prefix up to the fourth "." in a line will be the key and the rest of the line (excluding the fourth ".") will be the value. If a line has less than four "."s, then the whole line will be the key and the value will be an empty Text object (like the one created by new Text("")).
+</p><p>
+Similarly, you can use "-jobconf stream.reduce.output.field.separator=SEP" and "-jobconf stream.num.reduce.output.fields=NUM" to specify the nth field separator in a line of the reduce outputs as the separator between the key and the value.
+</p>
+</section>
+
+
+<section>
+<title>A Useful Partitioner Class (secondary sort, the -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner option) </title>
+<p>
+Hadoop has a library class, org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner, that is useful for many applications. This class allows the map/reduce framework to partition the map outputs based on prefixes of keys, not the whole keys. For example:
+</p>
+<source>
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper org.apache.hadoop.mapred.lib.IdentityMapper \
+    -reducer org.apache.hadoop.mapred.lib.IdentityReducer \
+    -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner \
+    -jobconf stream.map.output.field.separator=. \
+    -jobconf stream.num.map.output.key.fields=4 \
+    -jobconf map.output.key.field.separator=. \
+    -jobconf num.key.fields.for.partition=2 \
+    -jobconf mapred.reduce.tasks=12
+</source>
+<p>
+Here, <em>-jobconf stream.map.output.field.separator=.</em> and <em>-jobconf stream.num.map.output.key.fields=4</em> are as explained in previous example. The two variables are used by streaming to identify the key/value pair of mapper. 
+</p><p>
+The map output keys of the above map/reduce job normally have four fields separated by ".". However, the map/reduce framework will partition the map outputs by the first two fields of the keys using the <em>-jobconf num.key.fields.for.partition=2</em> option. Here, <em>-jobconf map.output.key.field.separator=.</em> specifies the separator for the partition. This guarantees that all the key/value pairs with the same first two fields in the keys will be partitioned into the same reducer.
+</p><p>
+<em>This is effectively equivalent to specifying the first two fields as the primary key and the next two fields as the secondary. The primary key is used for partitioning, and the combination of the primary and secondary keys is used for sorting.</em> A simple illustration is shown here:
+</p>
+<p>
+Output of map (the keys)</p><source>
+11.12.1.2
+11.14.2.3
+11.11.4.1
+11.12.1.1
+11.14.2.2
+
+</source>
+<p>
+Partition into 3 reducers (the first 2 fields are used as keys for partition)</p><source>
+11.11.4.1
+-----------
+11.12.1.2
+11.12.1.1
+-----------
+11.14.2.3
+11.14.2.2
+</source>
+<p>
+Sorting within each partition for the reducer(all 4 fields used for sorting)</p><source>
+11.11.4.1
+-----------
+11.12.1.1
+11.12.1.2
+-----------
+11.14.2.2
+11.14.2.3
+</source>
+</section>
+
+<section>
+<title>Working with the Hadoop Aggregate Package (the -reduce aggregate option) </title>
+<p>
+Hadoop has a library package called "Aggregate" (<a href="https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate">https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate</a>).  Aggregate provides a special reducer class and a special combiner class, and a list of simple aggregators that perform aggregations such as "sum", "max", "min" and so on  over a sequence of values. Aggregate allows you to define a mapper plugin class that is expected to generate "aggregatable items" for each input key/value pair of the mappers. The combiner/reducer will aggregate those aggregatable items by invoking the appropriate aggregators.
+</p><p>
+To use Aggregate, simply specify "-reducer aggregate":
+</p>
+<source>
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper myAggregatorForKeyCount.py \
+    -reducer aggregate \
+    -file myAggregatorForKeyCount.py \
+    -jobconf mapred.reduce.tasks=12
+</source>
+<p>
+The python program myAggregatorForKeyCount.py looks like:
+</p>
+<source>
+#!/usr/bin/python
+
+import sys;
+
+def generateLongCountToken(id):
+    return "LongValueSum:" + id + "\t" + "1"
+
+def main(argv):
+    line = sys.stdin.readline();
+    try:
+        while line:
+            line = line&#91;:-1];
+            fields = line.split("\t");
+            print generateLongCountToken(fields&#91;0]);
+            line = sys.stdin.readline();
+    except "end of file":
+        return None
+if __name__ == "__main__":
+     main(sys.argv)
+</source>
+</section>
+
+<section>
+<title>Field Selection ( similar to unix 'cut' command) </title>
+<p>
+Hadoop has a library class, org.apache.hadoop.mapred.lib.FieldSelectionMapReduce, that effectively allows you to process text data like the unix "cut" utility. The map function defined in the class treats each input key/value pair as a list of fields. You can specify the field separator (the default is the tab character). You can select an arbitrary list of fields as the map output key, and an arbitrary list of fields as the map output value. Similarly, the reduce function defined in the class treats each input key/value pair as a list of fields. You can select an arbitrary list of fields as the reduce output key, and an arbitrary list of fields as the reduce output value. For example:
+</p>
+<source>
+$HADOOP_HOME/bin/hadoop  jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper org.apache.hadoop.mapred.lib.FieldSelectionMapReduce\
+    -reducer org.apache.hadoop.mapred.lib.FieldSelectionMapReduce\
+    -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner \
+    -jobconf map.output.key.field.separa=. \
+    -jobconf num.key.fields.for.partition=2 \
+    -jobconf mapred.data.field.separator=. \
+    -jobconf map.output.key.value.fields.spec=6,5,1-3:0- \
+    -jobconf reduce.output.key.value.fields.spec=0-2:5- \
+    -jobconf mapred.reduce.tasks=12
+</source>
+<p>
+The option "-jobconf map.output.key.value.fields.spec=6,5,1-3:0-" specifies key/value selection for the map outputs. Key selection spec and value selection spec are separated by ":". In this case, the map output key will consist of fields 6, 5, 1, 2, and 3. The map output value will consist of all fields (0- means field 0 and all 
+the subsequent fields). 
+</p><p>
+The option "-jobconf reduce.output.key.value.fields.spec=0-2:0-" specifies key/value selection for the reduce outputs. In this case, the reduce output key will consist of fields 0, 1, 2 (corresponding to the original fields 6, 5, 1). The reduce output value will consist of all fields starting from field 5 (corresponding to all the original fields).  
+</p>
+</section>
+</section>
+
+<section>
+<title>Frequently Asked Questions </title>
+
+<section>
+<title>How do I use Hadoop Streaming to run an arbitrary set of (semi-)independent tasks? </title>
+<p>
+Often you do not need the full power of Map Reduce, but only need to run multiple instances of the same program - either on different parts of the data, or on the same data, but with different parameters. You can use Hadoop Streaming to do this.
+</p>
+
+</section>
+
+<section>
+<title>How do I process files, one per map? </title>
+<p>
+As an example, consider the problem of zipping (compressing) a set of files across the hadoop cluster. You can achieve this using either of these methods:
+</p><ol>
+<li> Hadoop Streaming and custom mapper script:<ul>
+  <li> Generate a file containing the full DFS path of the input files. Each map task would get one file name as input.</li>
+  <li> Create a mapper script which, given a filename, will get the file to local disk, gzip the file and put it back in the desired output directory</li>
+</ul></li>
+<li>The existing Hadoop Framework:<ul>
+   <li>Add these commands to your main function:
+<source>
+       OutputFormatBase.setCompressOutput(conf, true);
+       OutputFormatBase.setOutputCompressorClass(conf, org.apache.hadoop.io.compress.GzipCodec.class);
+       conf.setOutputFormat(NonSplitableTextInputFormat.class);
+       conf.setNumReduceTasks(0);
+</source></li>
+   <li>Write your map function:
+<source>
+
+       public void map(WritableComparable key, Writable value, 
+                               OutputCollector output, 
+                               Reporter reporter) throws IOException {
+            output.collect((Text)value, null);
+       }
+</source></li>
+  <li>Note that the output filename will not be the same as the original filename</li>
+</ul></li>
+</ol>
+</section>
+
+<section>
+<title>How many reducers should I use? </title>
+<p>
+See the Hadoop Wiki for details: <a href="http://wiki.apache.org/lucene-hadoop/HowManyMapsAndReduces">http://wiki.apache.org/lucene-hadoop/HowManyMapsAndReduces</a>
+</p>
+</section>
+
+<section>
+<title>If I set up an alias in my shell script, will that work after -mapper, i.e. say I do: alias c1='cut -f1'. Will -mapper "c1" work? </title>
+<p>
+Using an alias will not work, but variable substitution is allowed as shown in this example:
+</p>
+<source>
+$ hadoop dfs -cat samples/student_marks
+alice   50
+bruce   70
+charlie 80
+dan     75
+
+$ c2='cut -f2'; $HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
+    -input /user/me/samples/student_marks 
+    -mapper \"$c2\" -reducer 'cat'  
+    -output /user/me/samples/student_out 
+    -jobconf mapred.job.name='Experiment'
+
+$ hadoop dfs -ls samples/student_out
+Found 1 items/user/me/samples/student_out/part-00000    &lt;r 3&gt;   16
+
+$ hadoop dfs -cat samples/student_out/part-00000
+50
+70
+75
+80
+</source>
+</section>
+
+<section>
+<title>Can I use UNIX pipes? For example, will -mapper "cut -f1 | sed s/foo/bar/g" work?</title>
+<p>
+Currently this does not work and gives an "java.io.IOException: Broken pipe" error. This is probably a bug that needs to be investigated.
+</p>
+</section>
+
+<section>
+<title>When I run a streaming job by <strong>distributing large executables</strong> (for example, 3.6G) through the -file option, I get a "No space left on device" error. What do I do? </title>
+<p>
+The jar packaging happens in a directory pointed to by the configuration variable stream.tmpdir. The default value of stream.tmpdir is /tmp. Set the value to a directory with more space:
+</p>
+<source>
+-jobconf stream.tmpdir=/export/bigspace/...
+</source>
+</section>
+
+<section>
+<title>How do I specify multiple input directories? </title>
+<p>
+You can specify multiple input directories with multiple '-input' options:
+</p><source>
+ hadoop jar hadoop-streaming.jar -input '/user/foo/dir1' -input '/user/foo/dir2' 
+</source>
+</section>
+
+<section>
+<title>How do I generate output files with gzip format? </title>
+<p>
+Instead of plain text files, you can generate gzip files as your generated output. Pass '-jobconf mapred.output.compress=true -jobconf  mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCode' as option to your streaming job.
+</p>
+</section>
+
+<section>
+<title>How do I provide my own input/output format with streaming? </title>
+<p>
+At least as late as version 0.14, Hadoop does not support multiple jar files. So, when specifying your own custom classes you will have to pack them along with the streaming jar and use the custom jar instead of the default hadoop streaming jar. 
+</p>
+</section>
+
+<section>
+<title>How do I parse XML documents using streaming? </title>
+<p>
+You can use the record reader StreamXmlRecordReader to process XML documents. 
+</p>
+<source>
+hadoop jar hadoop-streaming.jar -inputreader "StreamXmlRecord,begin=BEGIN_STRING,end=END_STRING" ..... (rest of the command)
+</source>
+<p>
+Anything found between BEGIN_STRING and END_STRING would be treated as one record for map tasks.
+</p>
+</section>
+</section>
+</body>
+</document>

+ 2 - 1
src/docs/src/documentation/content/xdocs/tabs.xml

@@ -14,7 +14,8 @@
     directory (ends in '/'), in which case /index.html will be added
     directory (ends in '/'), in which case /index.html will be added
   -->
   -->
 
 
-  <tab label="Main" dir="" />  
+  <tab label="Project" href="http://lucene.apache.org/hadoop/" />
   <tab label="Wiki" href="http://wiki.apache.org/lucene-hadoop" />
   <tab label="Wiki" href="http://wiki.apache.org/lucene-hadoop" />
+  <tab label="Hadoop 0.15 Documentation" dir="" />  
   
   
 </tabs>
 </tabs>

+ 0 - 55
src/docs/src/documentation/content/xdocs/version_control.xml

@@ -1,55 +0,0 @@
-<?xml version="1.0"?>
-
-<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
-
-<document>
-  
-  <header>
-    <title>Hadoop Version Control System</title>
-  </header>
-  
-  <body>
-  
-    <section>
-      <title>Overview</title>
-      <p>
-        The Hadoop source code resides in the Apache <a href="http://subversion.tigris.org/">Subversion (SVN)</a> repository.
-        The command-line SVN client can be obtained <a href="http://subversion.tigris.org/project_packages.html">here</a>.
-        The TortoiseSVN GUI client for Windows can be obtained <a href="http://tortoisesvn.tigris.org/">here</a>. There
-        are also SVN plugins available for both <a href="http://subclipse.tigris.org/">Eclipse</a> and 
-        <a href="http://svnup.tigris.org/">IntelliJ IDEA</a>.
-      </p>
-    </section>
-    
-    <section>
-      <title>Web Access (read-only)</title>
-      <p>
-        The source code can be browsed via the Web at 
-        <a href="http://svn.apache.org/viewcvs.cgi/lucene/hadoop/">http://svn.apache.org/viewcvs.cgi/lucene/hadoop/</a>.
-        No SVN client software is required.
-      </p>
-    </section>
-    
-    <section>
-      <title>Anonymous Access (read-only)</title>
-      <p>
-        The SVN URL for anonymous users is 
-        <a href="http://svn.apache.org/repos/asf/lucene/hadoop/">http://svn.apache.org/repos/asf/lucene/hadoop/</a>.
-        Instructions for anonymous SVN access are 
-        <a href="http://www.apache.org/dev/version-control.html#anon-svn">here</a>.
-      </p>
-    </section>
-    
-    <section>
-      <title>Committer Access (read-write)</title>
-      <p>
-        The SVN URL for committers is 
-        <a href="https://svn.apache.org/repos/asf/lucene/hadoop/">https://svn.apache.org/repos/asf/lucene/hadoop/</a>.
-        Instructions for committer SVN access are 
-        <a href="http://www.apache.org/dev/version-control.html#https-svn">here</a>.
-      </p>
-    </section>
-    
-  </body>
-  
-</document>

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است