Browse Source

HDFS-719. Add 6 fault injection tests for pipeline close to simulate slow datanodes and disk errors.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hdfs/trunk@828179 13f79535-47bb-0310-9956-ffa450edef68
Tsz-wo Sze 15 years ago
parent
commit
589d46962c

+ 4 - 1
CHANGES.txt

@@ -295,7 +295,10 @@ Release 0.21.0 - Unreleased
     (cos)
 
     HDFS-716. Define a pointcut for pipeline close and add a few fault
-    injection tests.  (szetszwo)
+    injection tests to simulate out of memory problem.  (szetszwo)
+
+    HDFS-719. Add 6 fault injection tests for pipeline close to simulate slow
+    datanodes and disk errors.  (szetszwo)
 
   BUG FIXES
 

+ 35 - 0
src/test/aop/org/apache/hadoop/fi/DataTransferTestUtil.java

@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.fi;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -65,9 +66,15 @@ public class DataTransferTestUtil {
     /** Simulate action for the statusRead pointcut */
     public final ActionContainer<DatanodeID> fiStatusRead
         = new ActionContainer<DatanodeID>();
+    /** Simulate action for the pipelineAck pointcut */
+    public final ActionContainer<DatanodeID> fiPipelineAck
+        = new ActionContainer<DatanodeID>();
     /** Simulate action for the pipelineClose pointcut */
     public final ActionContainer<DatanodeID> fiPipelineClose
         = new ActionContainer<DatanodeID>();
+    /** Simulate action for the blockFileClose pointcut */
+    public final ActionContainer<DatanodeID> fiBlockFileClose
+        = new ActionContainer<DatanodeID>();
 
     /** Verification action for the pipelineInitNonAppend pointcut */
     public final ActionContainer<Integer> fiPipelineInitErrorNonAppend
@@ -178,6 +185,34 @@ public class DataTransferTestUtil {
     }
   }
 
+  /** Throws an IOException. */
+  public static class IoeAction extends DataNodeAction {
+    private final String error; 
+
+    /** Create an action for datanode i in the pipeline. */
+    public IoeAction(String currentTest, int i, String error) {
+      super(currentTest, i);
+      this.error = error;
+    }
+
+    @Override
+    public void run(DatanodeID id) throws IOException {
+      final DataTransferTest test = getDataTransferTest();
+      final Pipeline p = test.getPipeline(id);
+      if (p.contains(index, id)) {
+        final String s = toString(id);
+        FiTestUtil.LOG.info(s);
+        throw new IOException(s);
+      }
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public String toString() {
+      return error + " " + super.toString();
+    }
+  }
+
   /**
    * Sleep some period of time so that it slows down the datanode
    * or sleep forever so that datanode becomes not responding.

+ 13 - 1
src/test/aop/org/apache/hadoop/fi/FiTestUtil.java

@@ -40,7 +40,19 @@ public class FiTestUtil {
   };
 
   /**
-   * Return a random integer uniformly distributed in the interval [min,max).
+   * Return a random integer uniformly distributed over the interval [min,max).
+   */
+  public static int nextRandomInt(final int min, final int max) {
+    final int d = max - min;
+    if (d <= 0) {
+      throw new IllegalArgumentException("d <= 0, min=" + min + ", max=" + max);
+    }
+    return d == 1? min: min + RANDOM.get().nextInt(d);
+  }
+
+  /**
+   * Return a random integer, with type long,
+   * uniformly distributed over the interval [min,max).
    * Assume max - min <= Integer.MAX_VALUE.
    */
   public static long nextRandomLong(final long min, final long max) {

+ 9 - 1
src/test/aop/org/apache/hadoop/hdfs/DFSClientAspects.aj

@@ -23,8 +23,8 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fi.DataTransferTestUtil;
 import org.apache.hadoop.fi.DataTransferTestUtil.DataTransferTest;
+import org.apache.hadoop.hdfs.DFSClient.DFSOutputStream;
 import org.apache.hadoop.hdfs.DFSClient.DFSOutputStream.DataStreamer;
-
 import org.junit.Assert;
 
 /** Aspects for DFSClient */
@@ -85,4 +85,12 @@ public aspect DFSClientAspects {
     }
   }
 
+  pointcut pipelineClose(DFSOutputStream out):
+    call(void flushInternal())
+    && withincode (void DFSOutputStream.close())
+    && this(out);
+
+  before(DFSOutputStream out) : pipelineClose(out) {
+    LOG.info("FI: before pipelineClose:");
+  }
 }

+ 38 - 4
src/test/aop/org/apache/hadoop/hdfs/server/datanode/BlockReceiverAspects.aj

@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hdfs.server.datanode;
 
+import java.io.DataInput;
 import java.io.IOException;
 import java.io.OutputStream;
 
@@ -25,13 +26,15 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fi.DataTransferTestUtil;
 import org.apache.hadoop.fi.ProbabilityModel;
 import org.apache.hadoop.fi.DataTransferTestUtil.DataTransferTest;
+import org.apache.hadoop.hdfs.protocol.DataTransferProtocol.Status;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException;
 
 /**
  * This aspect takes care about faults injected into datanode.BlockReceiver 
  * class 
  */
-public aspect BlockReceiverAspects {
+public privileged aspect BlockReceiverAspects {
   public static final Log LOG = LogFactory.getLog(BlockReceiverAspects.class);
 
   pointcut callReceivePacket(BlockReceiver blockreceiver) :
@@ -69,7 +72,9 @@ public aspect BlockReceiverAspects {
       ) throws IOException : pipelineClose(blockreceiver, offsetInBlock, seqno,
           lastPacketInBlock, len, endOfHeader) {
     if (len == 0) {
-      LOG.info("FI: pipelineClose, offsetInBlock=" + offsetInBlock
+      final DatanodeRegistration dr = blockreceiver.getDataNode().getDatanodeRegistration();
+      LOG.info("FI: pipelineClose, datanode=" + dr
+          + ", offsetInBlock=" + offsetInBlock
           + ", seqno=" + seqno
           + ", lastPacketInBlock=" + lastPacketInBlock
           + ", len=" + len
@@ -77,9 +82,38 @@ public aspect BlockReceiverAspects {
   
       final DataTransferTest test = DataTransferTestUtil.getDataTransferTest();
       if (test != null) {
-        test.fiPipelineClose.run(
-            blockreceiver.getDataNode().getDatanodeRegistration());
+        test.fiPipelineClose.run(dr);
       }
     }
   }
+
+  pointcut pipelineAck(BlockReceiver.PacketResponder packetresponder) :
+    call (Status Status.read(DataInput))
+      && this(packetresponder);
+
+  after(BlockReceiver.PacketResponder packetresponder) throws IOException
+      : pipelineAck(packetresponder) {
+    final DatanodeRegistration dr = packetresponder.receiver.getDataNode().getDatanodeRegistration();
+    LOG.info("FI: fiPipelineAck, datanode=" + dr);
+
+    final DataTransferTest test = DataTransferTestUtil.getDataTransferTest();
+    if (test != null) {
+      test.fiPipelineAck.run(dr);
+    }
+  }
+
+  pointcut blockFileClose(BlockReceiver blockreceiver) :
+    call(void close())
+      && withincode(void BlockReceiver.close())
+      && this(blockreceiver);
+
+  after(BlockReceiver blockreceiver) throws IOException : blockFileClose(blockreceiver) {
+    final DatanodeRegistration dr = blockreceiver.getDataNode().getDatanodeRegistration();
+    LOG.info("FI: blockFileClose, datanode=" + dr);
+
+    final DataTransferTest test = DataTransferTestUtil.getDataTransferTest();
+    if (test != null) {
+      test.fiBlockFileClose.run(dr);
+    }
+  }
 }

+ 78 - 3
src/test/aop/org/apache/hadoop/hdfs/server/datanode/TestFiDataTransferProtocol.java

@@ -24,6 +24,7 @@ import org.apache.hadoop.fi.DataTransferTestUtil;
 import org.apache.hadoop.fi.FiTestUtil;
 import org.apache.hadoop.fi.DataTransferTestUtil.DataTransferTest;
 import org.apache.hadoop.fi.DataTransferTestUtil.DoosAction;
+import org.apache.hadoop.fi.DataTransferTestUtil.IoeAction;
 import org.apache.hadoop.fi.DataTransferTestUtil.OomAction;
 import org.apache.hadoop.fi.DataTransferTestUtil.SleepAction;
 import org.apache.hadoop.fi.DataTransferTestUtil.VerificationAction;
@@ -32,11 +33,10 @@ import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
-import org.apache.hadoop.hdfs.HdfsConfiguration;
-import org.apache.hadoop.hdfs.DFSConfigKeys;
-
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -310,6 +310,37 @@ public class TestFiDataTransferProtocol {
     write1byte(methodName);
   }
 
+  private static void run41_43(String name, int i) throws IOException {
+    runPipelineCloseTest(name, new SleepAction(name, i, 3000));
+  }
+  
+  /**
+   * Pipeline close with DN0 very slow but it won't lead to timeout.
+   * Client finishes close successfully.
+   */
+  @Test
+  public void pipeline_Fi_41() throws IOException {
+    run41_43(FiTestUtil.getMethodName(), 0);
+  }
+
+  /**
+   * Pipeline close with DN1 very slow but it won't lead to timeout.
+   * Client finishes close successfully.
+   */
+  @Test
+  public void pipeline_Fi_42() throws IOException {
+    run41_43(FiTestUtil.getMethodName(), 1);
+  }
+
+  /**
+   * Pipeline close with DN2 very slow but it won't lead to timeout.
+   * Client finishes close successfully.
+   */
+  @Test
+  public void pipeline_Fi_43() throws IOException {
+    run41_43(FiTestUtil.getMethodName(), 2);
+  }
+
   /**
    * Pipeline close:
    * DN0 throws an OutOfMemoryException
@@ -345,4 +376,48 @@ public class TestFiDataTransferProtocol {
     final String methodName = FiTestUtil.getMethodName();
     runPipelineCloseTest(methodName, new OomAction(methodName, 2));
   }
+
+  private static void runBlockFileCloseTest(String methodName,
+      Action<DatanodeID> a) throws IOException {
+    FiTestUtil.LOG.info("Running " + methodName + " ...");
+    final DataTransferTest t = (DataTransferTest) DataTransferTestUtil
+        .initTest();
+    t.fiBlockFileClose.set(a);
+    write1byte(methodName);
+  }
+
+  private static void run49_51(String name, int i) throws IOException {
+    runBlockFileCloseTest(name, new IoeAction(name, i, "DISK ERROR"));
+  }
+
+  /**
+   * Pipeline close:
+   * DN0 throws a disk error exception when it is closing the block file.
+   * Client gets an IOException and determine DN0 bad.
+   */
+  @Test
+  public void pipeline_Fi_49() throws IOException {
+    run49_51(FiTestUtil.getMethodName(), 0);
+  }
+
+
+  /**
+   * Pipeline close:
+   * DN1 throws a disk error exception when it is closing the block file.
+   * Client gets an IOException and determine DN1 bad.
+   */
+  @Test
+  public void pipeline_Fi_50() throws IOException {
+    run49_51(FiTestUtil.getMethodName(), 1);
+  }
+
+  /**
+   * Pipeline close:
+   * DN2 throws a disk error exception when it is closing the block file.
+   * Client gets an IOException and determine DN2 bad.
+   */
+  @Test
+  public void pipeline_Fi_51() throws IOException {
+    run49_51(FiTestUtil.getMethodName(), 2);
+  }
 }