Explorar el Código

MAPREDUCE-2639. Bug fixes in speculate.DataStatistics. Contributed by Josh Wills.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/MR-279@1157343 13f79535-47bb-0310-9956-ffa450edef68
Arun Murthy hace 13 años
padre
commit
64e9c16789

+ 3 - 0
mapreduce/CHANGES.txt

@@ -4,6 +4,9 @@ Trunk (unreleased changes)
 
   MAPREDUCE-279
 
+    MAPREDUCE-2639. Bug fixes in speculate.DataStatistics. (Josh Wills via
+    acmurthy)
+
     Ensure NM uses the right LocalDirAllocator.getLocalPathForWrite call which
     doesn't interfere with disk-fail-in-place. (acmurthy)
 

+ 6 - 10
mapreduce/mr-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/speculate/DataStatistics.java

@@ -2,7 +2,6 @@
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
@@ -38,15 +37,9 @@ public class DataStatistics {
     this.sumSquares += newNum * newNum;
   }
 
-  public void updateStatistics(double old, double update) {
-    sub(old);
-    add(update);
-  }
-
-  private synchronized void sub(double oldNum) {
-    this.count--;
-    this.sum = Math.max(this.sum - oldNum, 0.0d);
-    this.sumSquares = Math.max(this.sumSquares - oldNum * oldNum, 0.0d);
+  public synchronized void updateStatistics(double old, double update) {
+	this.sum += update - old;
+	this.sumSquares += (update * update) - (old * old);
   }
 
   public synchronized double mean() {
@@ -55,6 +48,9 @@ public class DataStatistics {
 
   public synchronized double var() {
     // E(X^2) - E(X)^2
+    if (count <= 1) {
+      return 0.0;
+    }
     double mean = mean();
     return Math.max((sumSquares/count) - mean * mean, 0.0d);
   }

+ 73 - 0
mapreduce/mr-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/speculate/TestDataStatistics.java

@@ -0,0 +1,73 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.mapreduce.v2.app.speculate;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestDataStatistics {
+
+  private static final double TOL = 0.001;
+
+  @Test
+  public void testEmptyDataStatistics() throws Exception {
+    DataStatistics statistics = new DataStatistics();
+    Assert.assertEquals(0, statistics.count(), TOL);
+    Assert.assertEquals(0, statistics.mean(), TOL);
+    Assert.assertEquals(0, statistics.var(), TOL);
+    Assert.assertEquals(0, statistics.std(), TOL);
+    Assert.assertEquals(0, statistics.outlier(1.0f), TOL);
+  }
+  
+  @Test
+  public void testSingleEntryDataStatistics() throws Exception {
+    DataStatistics statistics = new DataStatistics(17.29);
+    Assert.assertEquals(1, statistics.count(), TOL);
+    Assert.assertEquals(17.29, statistics.mean(), TOL);
+    Assert.assertEquals(0, statistics.var(), TOL);
+    Assert.assertEquals(0, statistics.std(), TOL);
+    Assert.assertEquals(17.29, statistics.outlier(1.0f), TOL);
+  }
+  
+  @Test
+  public void testMutiEntryDataStatistics() throws Exception {
+    DataStatistics statistics = new DataStatistics();
+    statistics.add(17);
+    statistics.add(29);
+    Assert.assertEquals(2, statistics.count(), TOL);
+    Assert.assertEquals(23.0, statistics.mean(), TOL);
+    Assert.assertEquals(36.0, statistics.var(), TOL);
+    Assert.assertEquals(6.0, statistics.std(), TOL);
+    Assert.assertEquals(29.0, statistics.outlier(1.0f), TOL);
+ }
+  
+  @Test
+  public void testUpdateStatistics() throws Exception {
+    DataStatistics statistics = new DataStatistics(17);
+    statistics.add(29);
+    Assert.assertEquals(2, statistics.count(), TOL);
+    Assert.assertEquals(23.0, statistics.mean(), TOL);
+    Assert.assertEquals(36.0, statistics.var(), TOL);
+
+    statistics.updateStatistics(17, 29);
+    Assert.assertEquals(2, statistics.count(), TOL);
+    Assert.assertEquals(29.0, statistics.mean(), TOL);
+    Assert.assertEquals(0.0, statistics.var(), TOL);
+  }
+}