Browse Source

MAPREDUCE-3166. [Rumen] Make Rumen use job history api instead of relying on current history file name format. (Ravi Gummadi via amarrk)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1195540 13f79535-47bb-0310-9956-ffa450edef68
Amar Kamat 13 năm trước cách đây
mục cha
commit
29ba7087a6

+ 3 - 0
hadoop-mapreduce-project/CHANGES.txt

@@ -1,4 +1,7 @@
 Hadoop MapReduce Change Log
+    MAPREDUCE-3166. [Rumen] Make Rumen use job history api instead of relying
+    on current history file name format. (Ravi Gummadi via amarrk)
+
     MAPREDUCE-3157. [Rumen] Fix TraceBuilder to handle 0.20 history file
                     names also. (Ravi Gummadi via amarrk)
 

+ 2 - 0
hadoop-mapreduce-project/ivy.xml

@@ -87,6 +87,8 @@
                rev="${yarn.version}" conf="compile->default"/>
    <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core" 
                rev="${yarn.version}" conf="compile->default"/>
+   <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-common" 
+               rev="${yarn.version}" conf="compile->default"/>
    <dependency org="org.apache.hadoop" name="hadoop-yarn-common"
                rev="${yarn.version}" conf="compile->default"/>
    <dependency org="log4j" name="log4j" rev="${log4j.version}" 

+ 3 - 2
hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java

@@ -310,7 +310,7 @@ public class TestRumenJobTraces {
   public void testJobHistoryFilenameParsing() throws IOException {
     final Configuration conf = new Configuration();
     final FileSystem lfs = FileSystem.getLocal(conf);
-    String user = "testUser";
+
     org.apache.hadoop.mapred.JobID jid = 
       new org.apache.hadoop.mapred.JobID("12345", 1);
     final Path rootInputDir =
@@ -318,7 +318,8 @@ public class TestRumenJobTraces {
             .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());
     
     // Check if current jobhistory filenames are detected properly
-    Path jhFilename = JobHistory.getJobHistoryFile(rootInputDir, jid, user);
+    Path jhFilename = org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils
+        .getStagingJobHistoryFile(rootInputDir, jid.toString(), 1);
     validateHistoryFileNameParsing(jhFilename, jid);
 
     // Check if Pre21 V1 jophistory file names are detected properly

+ 35 - 4
hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JobHistoryUtils.java

@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.tools.rumen;
 
+import java.io.IOException;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -56,6 +57,30 @@ public class JobHistoryUtils {
     return jobId;
   }
 
+  /**
+   * Extracts job id from the current hadoop version's job history file name.
+   * @param fileName job history file name from which job id is to be extracted
+   * @return job id if the history file name format is same as that of the
+   * current hadoop version. Returns null otherwise.
+   */
+  private static String extractJobIDFromCurrentHistoryFile(String fileName) {
+    JobID id = null;
+    if (org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils
+            .isValidJobHistoryFileName(fileName)) {
+      try {
+        id = org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils
+                 .getJobIDFromHistoryFilePath(fileName);
+      } catch (IOException e) {
+        // Ignore this exception and go ahead with getting of jobID assuming
+        // older hadoop verison's history file
+      }
+    }
+    if (id != null) {
+      return id.toString();
+    }
+    return null;
+  }
+
   /**
    * Extracts jobID string from the given job history file name.
    * @param fileName name of the job history file
@@ -67,16 +92,22 @@ public class JobHistoryUtils {
     // (1) old pre21 job history file name format
     // (2) new pre21 job history file name format
     // (3) current job history file name format i.e. 0.22
+
+    // Try to get the jobID assuming that the history file is from the current
+    // hadoop version
+    String jobID = extractJobIDFromCurrentHistoryFile(fileName);
+    if (jobID != null) {
+      return jobID;//history file is of current hadoop version
+    }
+
+    // History file could be of older hadoop versions
     String pre21JobID = applyParser(fileName,
         Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX_V1);
     if (pre21JobID == null) {
       pre21JobID = applyParser(fileName,
           Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX_V2);
     }
-    if (pre21JobID != null) {
-      return pre21JobID;
-    }
-    return applyParser(fileName, JobHistory.JOBHISTORY_FILENAME_REGEX);
+    return pre21JobID;
   }
 
   /**