Преглед изворни кода

AMBARI-22846. Get rid of Murmur Hash usage in Log Feeder for log events

Oliver Szabo пре 8 година
родитељ
комит
38c7c1efa2

+ 5 - 4
ambari-logsearch/ambari-logsearch-logfeeder/src/main/java/org/apache/ambari/logfeeder/output/OutputManagerImpl.java

@@ -19,6 +19,7 @@
 package org.apache.ambari.logfeeder.output;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.hash.Hashing;
 import org.apache.ambari.logfeeder.common.LogFeederConstants;
 import org.apache.ambari.logfeeder.conf.LogFeederProps;
 import org.apache.ambari.logfeeder.loglevelfilter.LogLevelFilterHandler;
@@ -29,7 +30,6 @@ import org.apache.ambari.logfeeder.plugin.input.InputMarker;
 import org.apache.ambari.logfeeder.plugin.manager.OutputManager;
 import org.apache.ambari.logfeeder.plugin.output.Output;
 import org.apache.ambari.logfeeder.util.LogFeederUtil;
-import org.apache.ambari.logfeeder.util.MurmurHash;
 import org.apache.ambari.logsearch.config.api.OutputConfigMonitor;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.log4j.Level;
@@ -46,7 +46,6 @@ import java.util.UUID;
 public class OutputManagerImpl extends OutputManager {
   private static final Logger LOG = Logger.getLogger(OutputManagerImpl.class);
 
-  private static final int HASH_SEED = 31174077;
   private static final int MAX_OUTPUT_SIZE = 32765; // 32766-1
 
   private List<Output> outputs = new ArrayList<Output>();
@@ -132,7 +131,9 @@ public class OutputManagerImpl extends OutputManager {
         }
       }
 
-      Long eventMD5 = MurmurHash.hash64A(LogFeederUtil.getGson().toJson(jsonObj).getBytes(), HASH_SEED);
+
+      byte[] bytes = LogFeederUtil.getGson().toJson(jsonObj).getBytes();
+      Long eventMD5 = Hashing.md5().hashBytes(bytes).asLong();
       if (input.isGenEventMD5()) {
         jsonObj.put("event_md5", prefix + eventMD5.toString());
       }
@@ -157,7 +158,7 @@ public class OutputManagerImpl extends OutputManager {
       String logMessage = (String) jsonObj.get("log_message");
       logMessage = truncateLongLogMessage(jsonObj, input, logMessage);
       if (addMessageMD5) {
-        jsonObj.put("message_md5", "" + MurmurHash.hash64A(logMessage.getBytes(), 31174077));
+        jsonObj.put("message_md5", "" + Hashing.md5().hashBytes(logMessage.getBytes()).asLong());
       }
     }
     if (logLevelFilterHandler.isAllowed(jsonObj, inputMarker)

+ 0 - 163
ambari-logsearch/ambari-logsearch-logfeeder/src/main/java/org/apache/ambari/logfeeder/util/MurmurHash.java

@@ -1,163 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.ambari.logfeeder.util;
-
-import com.google.common.primitives.Ints;
-
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
-
-/**
- * This is a very fast, non-cryptographic hash suitable for general hash-based
- * lookup.  See http://murmurhash.googlepages.com/ for more details.
- * <p/>
- * <p>The C version of MurmurHash 2.0 found at that site was ported
- * to Java by Andrzej Bialecki (ab at getopt org).</p>
- */
-public final class MurmurHash {
-
-  private MurmurHash() {
-  }
-
-  /**
-   * Hashes an int.
-   *
-   * @param data The int to hash.
-   * @param seed The seed for the hash.
-   * @return The 32 bit hash of the bytes in question.
-   */
-  public static int hash(int data, int seed) {
-    return hash(ByteBuffer.wrap(Ints.toByteArray(data)), seed);
-  }
-
-  /**
-   * Hashes bytes in an array.
-   *
-   * @param data The bytes to hash.
-   * @param seed The seed for the hash.
-   * @return The 32 bit hash of the bytes in question.
-   */
-  public static int hash(byte[] data, int seed) {
-    return hash(ByteBuffer.wrap(data), seed);
-  }
-
-  /**
-   * Hashes bytes in part of an array.
-   *
-   * @param data   The data to hash.
-   * @param offset Where to start munging.
-   * @param length How many bytes to process.
-   * @param seed   The seed to start with.
-   * @return The 32-bit hash of the data in question.
-   */
-  public static int hash(byte[] data, int offset, int length, int seed) {
-    return hash(ByteBuffer.wrap(data, offset, length), seed);
-  }
-
-  /**
-   * Hashes the bytes in a buffer from the current position to the limit.
-   *
-   * @param buf  The bytes to hash.
-   * @param seed The seed for the hash.
-   * @return The 32 bit murmur hash of the bytes in the buffer.
-   */
-  public static int hash(ByteBuffer buf, int seed) {
-    // save byte order for later restoration
-    ByteOrder byteOrder = buf.order();
-    buf.order(ByteOrder.LITTLE_ENDIAN);
-
-    int m = 0x5bd1e995;
-    int r = 24;
-
-    int h = seed ^ buf.remaining();
-
-    while (buf.remaining() >= 4) {
-      int k = buf.getInt();
-
-      k *= m;
-      k ^= k >>> r;
-      k *= m;
-
-      h *= m;
-      h ^= k;
-    }
-
-    if (buf.remaining() > 0) {
-      ByteBuffer finish = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN);
-      // for big-endian version, use this first:
-      // finish.position(4-buf.remaining());
-      finish.put(buf).rewind();
-      h ^= finish.getInt();
-      h *= m;
-    }
-
-    h ^= h >>> 13;
-    h *= m;
-    h ^= h >>> 15;
-
-    buf.order(byteOrder);
-    return h;
-  }
-
-
-  public static long hash64A(byte[] data, int seed) {
-    return hash64A(ByteBuffer.wrap(data), seed);
-  }
-
-  public static long hash64A(byte[] data, int offset, int length, int seed) {
-    return hash64A(ByteBuffer.wrap(data, offset, length), seed);
-  }
-
-  public static long hash64A(ByteBuffer buf, int seed) {
-    ByteOrder byteOrder = buf.order();
-    buf.order(ByteOrder.LITTLE_ENDIAN);
-
-    long m = 0xc6a4a7935bd1e995L;
-    int r = 47;
-
-    long h = seed ^ (buf.remaining() * m);
-
-    while (buf.remaining() >= 8) {
-      long k = buf.getLong();
-
-      k *= m;
-      k ^= k >>> r;
-      k *= m;
-
-      h ^= k;
-      h *= m;
-    }
-
-    if (buf.remaining() > 0) {
-      ByteBuffer finish = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN);
-      // for big-endian version, do this first:
-      // finish.position(8-buf.remaining());
-      finish.put(buf).rewind();
-      h ^= finish.getLong();
-      h *= m;
-    }
-
-    h ^= h >>> r;
-    h *= m;
-    h ^= h >>> r;
-
-    buf.order(byteOrder);
-    return h;
-  }
-
-}