Browse Source

HDFS-16942. Send error to datanode if FBR is rejected due to bad lease (#5460)

Stephen O'Donnell 2 years ago
parent
commit
ca6f5afb6d

+ 6 - 0
hadoop-client-modules/hadoop-client-api/pom.xml

@@ -126,6 +126,12 @@
                         <exclude>org/apache/hadoop/yarn/client/api/package-info.class</exclude>
                       </excludes>
                     </filter>
+                    <filter>
+                      <artifact>org.apache.hadoop:*</artifact>
+                      <excludes>
+                        <exclude>org/apache/hadoop/hdfs/server/protocol/package-info.class</exclude>
+                      </excludes>
+                    </filter>
                   </filters>
                   <relocations>
                     <relocation>

+ 4 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java

@@ -63,6 +63,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
 import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException;
 import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
+import org.apache.hadoop.hdfs.server.protocol.InvalidBlockReportLeaseException;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
 import org.apache.hadoop.hdfs.server.protocol.SlowPeerReports;
@@ -791,6 +792,9 @@ class BPServiceActor implements Runnable {
           shouldServiceRun = false;
           return;
         }
+        if (InvalidBlockReportLeaseException.class.getName().equals(reClass)) {
+          fullBlockReportLeaseId = 0;
+        }
         LOG.warn("RemoteException in offerService", re);
         sleepAfterException();
       } catch (IOException e) {

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java

@@ -172,6 +172,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport;
 import org.apache.hadoop.hdfs.server.protocol.FinalizeCommand;
 import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
+import org.apache.hadoop.hdfs.server.protocol.InvalidBlockReportLeaseException;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
@@ -1651,6 +1652,8 @@ public class NameNodeRpcServer implements NamenodeProtocols {
             bm.processReport(nodeReg, reports[index].getStorage(),
                 blocks, context));
         }
+      } else {
+        throw new InvalidBlockReportLeaseException(context.getReportId(), context.getLeaseId());
       }
     } catch (UnregisteredNodeException une) {
       LOG.warn("Datanode {} is attempting to report but not register yet.",

+ 41 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/InvalidBlockReportLeaseException.java

@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.protocol;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * This exception is thrown when a datanode sends a full block report but it is
+ * rejected by the Namenode due to an invalid lease (expired or otherwise).
+ *
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public class InvalidBlockReportLeaseException extends IOException {
+  /** for java.io.Serializable. */
+  private static final long serialVersionUID = 1L;
+
+  public InvalidBlockReportLeaseException(long blockReportID, long leaseID) {
+    super("Block report 0x" + Long.toHexString(blockReportID) + " was rejected as lease 0x"
+        + Long.toHexString(leaseID) +  " is invalid");
+  }
+}

+ 27 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/package-info.java

@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This package provides classes for the namenode server protocol.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+package org.apache.hadoop.hdfs.server.protocol;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;

+ 69 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockReportLease.java

@@ -29,6 +29,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
 import org.apache.hadoop.hdfs.server.protocol.FinalizeCommand;
 import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
+import org.apache.hadoop.hdfs.server.protocol.InvalidBlockReportLeaseException;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.hdfs.server.protocol.RegisterCommand;
 import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
@@ -41,12 +42,14 @@ import org.junit.Test;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Random;
+import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.mockito.ArgumentMatchers.any;
 import static org.mockito.Mockito.doAnswer;
 import static org.mockito.Mockito.spy;
@@ -137,6 +140,72 @@ public class TestBlockReportLease {
     }
   }
 
+  @Test
+  public void testExceptionThrownWhenFBRLeaseExpired() throws Exception {
+    HdfsConfiguration conf = new HdfsConfiguration();
+    Random rand = new Random();
+
+    try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+        .numDataNodes(1).build()) {
+      cluster.waitActive();
+
+      FSNamesystem fsn = cluster.getNamesystem();
+      BlockManager blockManager = fsn.getBlockManager();
+      BlockManager spyBlockManager = spy(blockManager);
+      fsn.setBlockManagerForTesting(spyBlockManager);
+      String poolId = cluster.getNamesystem().getBlockPoolId();
+
+      NamenodeProtocols rpcServer = cluster.getNameNodeRpc();
+
+      // Test based on one DataNode report to Namenode
+      DataNode dn = cluster.getDataNodes().get(0);
+      DatanodeDescriptor datanodeDescriptor = spyBlockManager
+          .getDatanodeManager().getDatanode(dn.getDatanodeId());
+
+      DatanodeRegistration dnRegistration = dn.getDNRegistrationForBP(poolId);
+      StorageReport[] storages = dn.getFSDataset().getStorageReports(poolId);
+
+      // Send heartbeat and request full block report lease
+      HeartbeatResponse hbResponse = rpcServer.sendHeartbeat(
+          dnRegistration, storages, 0, 0, 0, 0, 0, null, true,
+          SlowPeerReports.EMPTY_REPORT, SlowDiskReports.EMPTY_REPORT);
+
+      // Remove full block report lease about dn
+      spyBlockManager.getBlockReportLeaseManager()
+          .removeLease(datanodeDescriptor);
+
+      ExecutorService pool = Executors.newFixedThreadPool(1);
+
+      // Trigger sendBlockReport
+      BlockReportContext brContext = new BlockReportContext(1, 0,
+          rand.nextLong(), hbResponse.getFullBlockReportLeaseId());
+      Future<DatanodeCommand> sendBRfuturea = pool.submit(() -> {
+        // Build every storage with 100 blocks for sending report
+        DatanodeStorage[] datanodeStorages
+            = new DatanodeStorage[storages.length];
+        for (int i = 0; i < storages.length; i++) {
+          datanodeStorages[i] = storages[i].getStorage();
+        }
+        StorageBlockReport[] reports = createReports(datanodeStorages, 100);
+
+        // Send blockReport
+        return rpcServer.blockReport(dnRegistration, poolId, reports,
+            brContext);
+      });
+
+      // Get result, it will not null if process successfully
+      ExecutionException exception = null;
+      try {
+        sendBRfuturea.get();
+      } catch (ExecutionException e) {
+        exception = e;
+      }
+      assertNotNull(exception);
+      assertEquals(InvalidBlockReportLeaseException.class,
+          exception.getCause().getClass());
+    }
+  }
+
   @Test
   public void testCheckBlockReportLeaseWhenDnUnregister() throws Exception {
     HdfsConfiguration conf = new HdfsConfiguration();

+ 4 - 3
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java

@@ -24,6 +24,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.StorageType;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.server.protocol.InvalidBlockReportLeaseException;
 import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
 
 import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
@@ -39,7 +40,6 @@ import static org.junit.Assert.fail;
 
 import java.io.File;
 import java.io.IOException;
-import java.net.ConnectException;
 import java.net.InetSocketAddress;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -1187,8 +1187,9 @@ public class TestBPOfferService {
                 // just reject and wait until DN request for a new leaseId
                 if(leaseId == 1) {
                   firstLeaseId = leaseId;
-                  throw new ConnectException(
-                          "network is not reachable for test. ");
+                  InvalidBlockReportLeaseException e =
+                      new InvalidBlockReportLeaseException(context.getReportId(), 1);
+                  throw new RemoteException(e.getClass().getName(), e.getMessage());
                 } else {
                   secondLeaseId = leaseId;
                   return null;