瀏覽代碼

HDFS-9530. ReservedSpace is not cleared for abandoned Blocks (Brahma Reddy Battula)

Arpit Agarwal 8 年之前
父節點
當前提交
3f87efc0c8

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -30,6 +30,9 @@ Release 2.6.5 - UNRELEASED
     HDFS-8581. ContentSummary on / skips further counts on yielding lock
     HDFS-8581. ContentSummary on / skips further counts on yielding lock
     (J.Andreina via vinayakumarb)
     (J.Andreina via vinayakumarb)
 
 
+    HDFS-9530. ReservedSpace is not cleared for abandoned Blocks.
+    (Brahma Reddy Battula)
+
 Release 2.6.4 - 2016-02-11
 Release 2.6.4 - 2016-02-11
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeFaultInjector.java

@@ -49,4 +49,6 @@ public class DataNodeFaultInjector {
   public void sendShortCircuitShmResponse() throws IOException {}
   public void sendShortCircuitShmResponse() throws IOException {}
 
 
   public void noRegistration() throws IOException { }
   public void noRegistration() throws IOException { }
+
+  public void failMirrorConnection() throws IOException { }
 }
 }

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java

@@ -665,6 +665,7 @@ class DataXceiver extends Receiver implements Runnable {
         mirrorTarget = NetUtils.createSocketAddr(mirrorNode);
         mirrorTarget = NetUtils.createSocketAddr(mirrorNode);
         mirrorSock = datanode.newSocket();
         mirrorSock = datanode.newSocket();
         try {
         try {
+          DataNodeFaultInjector.get().failMirrorConnection();
           int timeoutValue = dnConf.socketTimeout
           int timeoutValue = dnConf.socketTimeout
               + (HdfsServerConstants.READ_TIMEOUT_EXTENSION * targets.length);
               + (HdfsServerConstants.READ_TIMEOUT_EXTENSION * targets.length);
           int writeTimeout = dnConf.socketWriteTimeout + 
           int writeTimeout = dnConf.socketWriteTimeout + 

+ 5 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java

@@ -86,6 +86,7 @@ import org.apache.hadoop.hdfs.server.datanode.UnexpectedReplicaStateException;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.LengthInputStream;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.LengthInputStream;
+import org.apache.hadoop.hdfs.server.datanode.ReplicaInPipelineInterface;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaInputStreams;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaOutputStreams;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaOutputStreams;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.RollingLogs;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.RollingLogs;
@@ -1606,7 +1607,10 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
               +  ". Parent not found for file " + f);
               +  ". Parent not found for file " + f);
           continue;
           continue;
         }
         }
-        volumeMap.remove(bpid, invalidBlks[i]);
+        ReplicaInfo removing = volumeMap.remove(bpid, invalidBlks[i]);
+        if (removing instanceof ReplicaInPipelineInterface) {
+          ((ReplicaInPipelineInterface) removing).releaseAllBytesReserved();
+        }
       }
       }
 
 
       if (v.isTransientStorage()) {
       if (v.isTransientStorage()) {

+ 47 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestRbwSpaceReservation.java

@@ -32,6 +32,8 @@ import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.*;
 import org.apache.hadoop.hdfs.*;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector;
+import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.GenericTestUtils;
@@ -65,7 +67,7 @@ public class TestRbwSpaceReservation {
   private DistributedFileSystem fs = null;
   private DistributedFileSystem fs = null;
   private DFSClient client = null;
   private DFSClient client = null;
   FsVolumeImpl singletonVolume = null;
   FsVolumeImpl singletonVolume = null;
-
+  private DataNodeFaultInjector old = null;
   private static Random rand = new Random();
   private static Random rand = new Random();
 
 
   private void initConfig(int blockSize) {
   private void initConfig(int blockSize) {
@@ -414,6 +416,50 @@ public class TestRbwSpaceReservation {
     checkReservedSpace(expectedFile2Reserved);
     checkReservedSpace(expectedFile2Reserved);
   }
   }
 
 
+  @Test(timeout = 30000)
+  public void testReservedSpaceForPipelineRecovery()
+      throws Exception {
+    final short replication = 3;
+    startCluster(BLOCK_SIZE, replication, -1);
+
+    final String methodName = GenericTestUtils.getMethodName();
+    final Path file = new Path("/" + methodName + ".01.dat");
+
+    old = DataNodeFaultInjector.get();
+    // Fault injector to fail connection to mirror first time.
+    DataNodeFaultInjector.set(new DataNodeFaultInjector() {
+      private int tries = 0;
+
+      @Override
+      public void failMirrorConnection() throws IOException {
+        if (tries++ == 0) {
+          throw new IOException("Failing Mirror for space reservation");
+        }
+      }
+    });
+    // Write 1 byte to the file and kill the writer.
+    FSDataOutputStream os = fs.create(file, replication);
+    os.write(new byte[1]);
+    os.close();
+    // Ensure all space reserved for the replica was released on each
+    // DataNode.
+    cluster.triggerBlockReports();
+    for (final DataNode dn : cluster.getDataNodes()) {
+      for (FsVolumeSpi fsVolume : dn.getFSDataset().getVolumes()) {
+        {
+          final FsVolumeImpl volume = (FsVolumeImpl) fsVolume;
+          GenericTestUtils.waitFor(new Supplier<Boolean>() {
+            @Override public Boolean get() {
+              LOG.info("dn " + dn.getDisplayName() + " space : " + volume
+                  .getReservedForRbw());
+              return (volume.getReservedForRbw() == 0);
+            }
+          }, 100, Integer.MAX_VALUE); // Wait until the test times out.
+        }
+      }
+    }
+  }
+
   private void checkReservedSpace(final long expectedReserved)
   private void checkReservedSpace(final long expectedReserved)
       throws TimeoutException, InterruptedException, IOException {
       throws TimeoutException, InterruptedException, IOException {
     for (final DataNode dn : cluster.getDataNodes()) {
     for (final DataNode dn : cluster.getDataNodes()) {