Ver código fonte

HDFS-4819. [Dynamometer] Fix parsing of audit logs which contain = in path names. Contributed by Soya Miyoshi.

Erik Krogen 5 anos atrás
pai
commit
ae42c8cb61

+ 15 - 4
hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditLogDirectParser.java

@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.text.DateFormat;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
+import java.util.HashMap;
 import java.util.Map;
 import java.util.TimeZone;
 import java.util.function.Function;
@@ -81,8 +82,6 @@ public class AuditLogDirectParser implements AuditCommandParser {
   public static final String AUDIT_LOG_PARSE_REGEX_DEFAULT =
       "^(?<timestamp>.+?) INFO [^:]+: (?<message>.+)$";
 
-  private static final Splitter.MapSplitter AUDIT_SPLITTER = Splitter.on("\t")
-      .trimResults().omitEmptyStrings().withKeyValueSeparator("=");
   private static final Splitter SPACE_SPLITTER = Splitter.on(" ").trimResults()
       .omitEmptyStrings();
 
@@ -132,8 +131,20 @@ public class AuditLogDirectParser implements AuditCommandParser {
     // Sanitize the = in the rename options field into a : so we can split on =
     String auditMessageSanitized =
         m.group("message").replace("(options=", "(options:");
-    Map<String, String> parameterMap = AUDIT_SPLITTER
-        .split(auditMessageSanitized);
+
+    Map<String, String> parameterMap = new HashMap<String, String>();
+    String[] auditMessageSanitizedList = auditMessageSanitized.split("\t");
+
+    for (String auditMessage : auditMessageSanitizedList) {
+      String[] splitMessage = auditMessage.split("=", 2);
+      try {
+        parameterMap.put(splitMessage[0], splitMessage[1]);
+      } catch (ArrayIndexOutOfBoundsException e) {
+        throw new IOException(
+            "Exception while parsing a message from audit log", e);
+      }
+    }
+
     return new AuditReplayCommand(relativeToAbsolute.apply(relativeTimestamp),
         // Split the UGI on space to remove the auth and proxy portions of it
         SPACE_SPLITTER.split(parameterMap.get("ugi")).iterator().next(),

+ 9 - 0
hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/test/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/TestAuditLogDirectParser.java

@@ -58,6 +58,15 @@ public class TestAuditLogDirectParser {
     assertEquals(expected, parser.parse(in, Function.identity()));
   }
 
+  @Test
+  public void testInputWithEquals() throws Exception {
+    Text in = getAuditString("1970-01-01 00:00:11,000", "fakeUser",
+            "listStatus", "day=1970", "null");
+    AuditReplayCommand expected = new AuditReplayCommand(1000, "fakeUser",
+            "listStatus", "day=1970", "null", "0.0.0.0");
+    assertEquals(expected, parser.parse(in, Function.identity()));
+  }
+
   @Test
   public void testInputWithRenameOptions() throws Exception {
     Text in = getAuditString("1970-01-01 00:00:11,000", "fakeUser",