Просмотр исходного кода

HADOOP-3743. Fix -libjars, -files, -archives options. (Amareshwari Sriramadasu via mahadev)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/trunk@677690 13f79535-47bb-0310-9956-ffa450edef68
Mahadev Konar 17 лет назад
Родитель
Сommit
f0a5cb3594

+ 3 - 0
CHANGES.txt

@@ -862,6 +862,9 @@ Release 0.18.0 - Unreleased
     required. (Amareshwari Sriramadasu via ddas)
 
     HADOOP-3755. Update gridmix to work with HOD 0.4 (Runping Qi via cdouglas)
+  
+    HADOOP-3743. Fix -libjars, -files, -archives options to work even if 
+    user code does not implement tools. (Amareshwari Sriramadasu via mahadev)
 
 Release 0.17.2 - Unreleased
 

+ 1 - 1
bin/hadoop

@@ -235,7 +235,7 @@ elif [ "$COMMAND" = "version" ] ; then
   CLASS=org.apache.hadoop.util.VersionInfo
   HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
 elif [ "$COMMAND" = "jar" ] ; then
-  CLASS=org.apache.hadoop.util.RunJar
+  CLASS=org.apache.hadoop.mapred.JobShell
 elif [ "$COMMAND" = "distcp" ] ; then
   CLASS=org.apache.hadoop.tools.DistCp
   CLASSPATH=${CLASSPATH}:${TOOL_PATH}

+ 1 - 0
src/core/org/apache/hadoop/util/GenericOptionsParser.java

@@ -247,6 +247,7 @@ public class GenericOptionsParser {
           conf.set(property[i], property[i+1]);
       }
     }
+    conf.setBoolean("mapred.used.genericoptionsparser", true);
   }
 
   /**

+ 5 - 1
src/docs/src/documentation/content/xdocs/commands_manual.xml

@@ -57,7 +57,11 @@
 				<p>
 				  Following are supported by <a href="commands_manual.html#dfsadmin">dfsadmin</a>, 
 				  <a href="commands_manual.html#fs">fs</a>, <a href="commands_manual.html#fsck">fsck</a> and 
-				  <a href="commands_manual.html#job">job</a>.
+				  <a href="commands_manual.html#job">job</a>. 
+				  Applications should implement 
+				  <a href="ext:api/org/apache/hadoop/util/tool">Tool</a> to support
+				  <a href="ext:api/org/apache/hadoop/util/genericoptionsparser">
+				  GenericOptions</a>.
 				</p>
 			     <table>
 			          <tr><th> GENERIC_OPTION </th><th> Description </th></tr>

+ 16 - 0
src/docs/src/documentation/content/xdocs/mapred_tutorial.xml

@@ -602,6 +602,22 @@
           <code>Hello    2</code><br/>
           <code>World    2</code><br/>
         </p>
+        
+        <p> Applications can specify a comma separated list of paths which
+        would be present in the current working directory of the task 
+        using the option <code>-files</code>. The <code>-libjars</code>
+        option allows applications to add jars to the classpaths of the maps
+        and reduces. The <code>-archives</code> allows them to pass archives
+        as arguments that are unzipped/unjarred and a link with name of the
+        jar/zip are created in the current working directory of tasks. More
+        details about the command line options are available at 
+        <a href="commands_manual.html">Commands manual</a></p>
+        
+        <p>Running <code>wordcount</code> example with 
+        <code>-libjars</code> and <code>-files</code>:<br/>
+        <code> hadoop jar hadoop-examples.jar wordcount -files cachefile.txt 
+        -libjars mylib.jar input output </code> 
+        </p>
       </section>
       
       <section>

+ 56 - 0
src/mapred/org/apache/hadoop/mapred/JobClient.java

@@ -153,6 +153,7 @@ public class JobClient extends Configured implements MRConstants, Tool  {
   private static final Log LOG = LogFactory.getLog("org.apache.hadoop.mapred.JobClient");
   public static enum TaskStatusFilter { NONE, KILLED, FAILED, SUCCEEDED, ALL }
   private TaskStatusFilter taskOutputFilter = TaskStatusFilter.FAILED; 
+  private static Configuration commandLineConfig;
   static long MAX_JOBPROFILE_AGE = 1000 * 2;
 
   /**
@@ -359,7 +360,25 @@ public class JobClient extends Configured implements MRConstants, Tool  {
     setConf(conf);
     init(conf);
   }
+
+  /**
+   * set the command line config in the jobclient. these are
+   * parameters paassed from the command line and stored in 
+   * conf
+   * @param conf the configuration object to set.
+   */
+  static synchronized void  setCommandLineConfig(Configuration conf) {
+    commandLineConfig = conf;
+  }
   
+  /**
+   * return the command line configuration
+   */
+  public static synchronized Configuration getCommandLineConfig() {
+    return commandLineConfig;
+  }
+  
+ 
   /**
    * Connect to the default {@link JobTracker}.
    * @param conf the job configuration.
@@ -484,14 +503,51 @@ public class JobClient extends Configured implements MRConstants, Tool  {
    */
   private void configureCommandLineOptions(JobConf job, Path submitJobDir, Path submitJarFile) 
     throws IOException {
+    
+    final String warning = "Use genericOptions for the option ";
+
+    if (!(job.getBoolean("mapred.used.genericoptionsparser", false))) {
+      LOG.warn("Use GenericOptionsParser for parsing the arguments. " +
+               "Applications should implement Tool for the same.");
+    }
+
     // get all the command line arguments into the 
     // jobconf passed in by the user conf
+    Configuration commandConf = JobClient.getCommandLineConfig();
     String files = null;
     String libjars = null;
     String archives = null;
+
     files = job.get("tmpfiles");
+    if (files == null) {
+      if (commandConf != null) {
+        files = commandConf.get("tmpfiles");
+        if (files != null) {
+          LOG.warn(warning + "-files");
+        }
+      }
+    }
+
     libjars = job.get("tmpjars");
+    if (libjars == null) {
+      if (commandConf != null) {
+        libjars = commandConf.get("tmpjars");
+        if (libjars != null) {
+          LOG.warn(warning + "-libjars");
+        }
+      }
+    }
+
     archives = job.get("tmparchives");
+    if (archives == null) {
+      if (commandConf != null) {
+        archives = commandConf.get("tmparchives");
+        if (archives != null) {
+          LOG.warn(warning + "-archives");
+        }
+      }
+    }
+    
     /*
      * set this user's id in job configuration, so later job files can be
      * accessed using this user's id

+ 70 - 0
src/mapred/org/apache/hadoop/mapred/JobShell.java

@@ -0,0 +1,70 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
+package org.apache.hadoop.mapred;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.util.RunJar;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/** Provide command line parsing for JobSubmission 
+ *  job submission looks like 
+ *  hadoop jar -libjars <comma seperated jars> -archives <comma seperated archives> 
+ *  -files <comma seperated files> inputjar args
+ */
+public class JobShell extends Configured implements Tool {
+  public JobShell() {this(null);};
+  
+  public JobShell(Configuration conf) {
+    super(conf);
+  }
+  
+  protected void init() throws IOException {
+    getConf().setQuietMode(false);
+  }
+  
+  /**
+   * run method from Tool
+   */
+  public int run(String argv[]) throws Exception {
+    int exitCode = -1;
+    Configuration conf = getConf();
+    try{
+      JobClient.setCommandLineConfig(conf);
+      try {
+        RunJar.main(argv);
+        exitCode = 0;
+      } catch(Throwable th) {
+        System.err.println(StringUtils.stringifyException(th));
+      }
+    } catch(RuntimeException re) {
+      exitCode = -1;
+      System.err.println(re.getLocalizedMessage());
+    }
+    return exitCode;
+  }
+  
+  public static void main(String[] argv) throws Exception {
+    JobShell jshell = new JobShell();
+    ToolRunner.run(jshell, argv);
+  }
+}

+ 79 - 0
src/test/org/apache/hadoop/mapred/TestJobShell.java

@@ -0,0 +1,79 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred;
+
+import java.io.File;
+import java.io.FileOutputStream;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+
+public class TestJobShell extends TestCase {
+  // Input output paths for this.. 
+  // these are all dummy and does not test
+  // much in map reduce except for the command line
+  // params 
+  static final Path input = new Path("/test/input/");
+  static final Path output = new Path("/test/output");
+  File buildDir = new File(System.getProperty("test.build.data", "/tmp"));
+  public void testJobShell() throws Exception {
+    MiniDFSCluster dfs = null;
+    MiniMRCluster mr = null;
+    FileSystem fs = null;
+    Path testFile = new Path(input, "testfile");
+    try {
+      Configuration conf = new Configuration();
+      //start the mini mr and dfs cluster.
+      dfs = new MiniDFSCluster(conf, 2 , true, null);
+      fs = dfs.getFileSystem();
+      FSDataOutputStream stream = fs.create(testFile);
+      stream.write("teststring".getBytes());
+      stream.close();
+      mr = new MiniMRCluster(2, fs.getUri().toString(), 1);
+      JobConf jconf = mr.createJobConf();
+      JobShell jshell = new JobShell();
+      File thisbuildDir = new File(buildDir, "jobCommand");
+      assertTrue("create build dir", thisbuildDir.mkdirs()); 
+      File f = new File(thisbuildDir, "files_tmp");
+      FileOutputStream fstream = new FileOutputStream(f);
+      fstream.write("somestrings".getBytes());
+      fstream.close();
+      String[] args = new String[8];
+      args[0] = "-files";
+      args[1] = f.toString();
+      args[2] = "-libjars";
+      /// the testjob.jar as a temporary jar file 
+      // rather than creating its own
+      args[3] = "build/test/testjar/testjob.jar";
+      args[4] = "build/test/testshell/testshell.jar";
+      args[5] = "testshell.ExternalMapReduce";
+      args[6] = input.toString();
+      args[7] = output.toString();
+      int ret = ToolRunner.run(jconf, jshell, args);
+      assertTrue("not failed ", ret != -1);
+      f.delete();
+      thisbuildDir.delete();
+    } finally {
+      if (dfs != null) {dfs.shutdown();};
+      if (mr != null) {mr.shutdown();};
+    }
+  }
+}

+ 8 - 3
src/test/testshell/ExternalMapReduce.java

@@ -115,8 +115,13 @@ public class ExternalMapReduce extends Configured implements Tool {
   }
   
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(new Configuration(),
-                             new ExternalMapReduce(), args);
-    System.exit(res);
+    Configuration commandConf = JobClient.getCommandLineConfig();
+    if (commandConf != null) {
+      ToolRunner.run(new Configuration(commandConf),
+                     new ExternalMapReduce(), args);
+    } else {
+      ToolRunner.run(new Configuration(),
+                     new ExternalMapReduce(), args);
+    }
   }
 }