Browse Source

HDFS-10178. Permanent write failures can happen if pipeline recoveries occur for the first packet. Contributed by Kihwal Lee.

(cherry picked from commit a7d1fb0cd2fdbf830602eb4dbbd9bbe62f4d5584)
(cherry picked from commit 69d4fa0deb27a4992cee15203277b2caa9d26d79)
Kihwal Lee 9 years ago
parent
commit
3b39721f30

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -39,6 +39,9 @@ Release 2.6.5 - UNRELEASED
     HDFS-9530. ReservedSpace is not cleared for abandoned Blocks.
     (Brahma Reddy Battula)
 
+    HDFS-10178. Permanent write failures can happen if pipeline recoveries
+    occur for the first packet (kihwal)
+
 Release 2.6.4 - 2016-02-11
 
   INCOMPATIBLE CHANGES

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java

@@ -548,6 +548,8 @@ class BlockReceiver implements Closeable {
     if (mirrorOut != null && !mirrorError) {
       try {
         long begin = Time.monotonicNow();
+        // For testing. Normally no-op.
+        DataNodeFaultInjector.get().stopSendingPacketDownstream();
         packetReceiver.mirrorPacketTo(mirrorOut);
         mirrorOut.flush();
         long now = Time.monotonicNow();

+ 5 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java

@@ -283,11 +283,15 @@ class BlockSender implements java.io.Closeable {
 
             // The meta file will contain only the header if the NULL checksum
             // type was used, or if the replica was written to transient storage.
+            // Also, when only header portion of a data packet was transferred
+            // and then pipeline breaks, the meta file can contain only the
+            // header and 0 byte in the block data file.
             // Checksum verification is not performed for replicas on transient
             // storage.  The header is important for determining the checksum
             // type later when lazy persistence copies the block to non-transient
             // storage and computes the checksum.
-            if (metaIn.getLength() > BlockMetadataHeader.getHeaderSize()) {
+            if (!replica.isOnTransientStorage() &&
+                metaIn.getLength() >= BlockMetadataHeader.getHeaderSize()) {
               checksumIn = new DataInputStream(new BufferedInputStream(
                   metaIn, HdfsConstants.IO_FILE_BUFFER_SIZE));
   

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java

@@ -48,6 +48,8 @@ public class DataNodeFaultInjector {
 
   public void sendShortCircuitShmResponse() throws IOException {}
 
+  public void stopSendingPacketDownstream() throws IOException {}
+
   public void noRegistration() throws IOException { }
 
   public void failMirrorConnection() throws IOException { }

+ 53 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientProtocolForPipelineRecovery.java

@@ -375,4 +375,57 @@ public class TestClientProtocolForPipelineRecovery {
       }
     }
   }
+
+  /**
+   * Test to make sure the checksum is set correctly after pipeline
+   * recovery transfers 0 byte partial block. If fails the test case
+   * will say "java.io.IOException: Failed to replace a bad datanode
+   * on the existing pipeline due to no more good datanodes being
+   * available to try."  This indicates there was a real failure
+   * after the staged failure.
+   */
+  @Test
+  public void testZeroByteBlockRecovery() throws Exception {
+    // Make the first datanode fail once. With 3 nodes and a block being
+    // created with 2 replicas, anything more than this planned failure
+    // will cause a test failure.
+    DataNodeFaultInjector dnFaultInjector = new DataNodeFaultInjector() {
+      int tries = 1;
+      @Override
+      public void stopSendingPacketDownstream() throws IOException {
+        if (tries > 0) {
+          tries--;
+          try {
+            Thread.sleep(60000);
+          } catch (InterruptedException ie) {
+            throw new IOException("Interrupted while sleeping. Bailing out.");
+          }
+        }
+      }
+    };
+    DataNodeFaultInjector oldDnInjector = DataNodeFaultInjector.get();
+    DataNodeFaultInjector.set(dnFaultInjector);
+
+    Configuration conf = new HdfsConfiguration();
+    conf.set(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY, "1000");
+    conf.set(DFSConfigKeys.
+        DFS_CLIENT_WRITE_REPLACE_DATANODE_ON_FAILURE_POLICY_KEY, "ALWAYS");
+    MiniDFSCluster cluster = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+      cluster.waitActive();
+
+      FileSystem fs = cluster.getFileSystem();
+      FSDataOutputStream out = fs.create(new Path("noheartbeat.dat"), (short)2);
+      out.write(0x31);
+      out.hflush();
+      out.close();
+
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+      DataNodeFaultInjector.set(oldDnInjector);
+    }
+  }
 }