Forráskód Böngészése

HDFS-13898. [SBN read] Throw retriable exception for getBlockLocations when ObserverNameNode is in safemode. Contributed by Chao Sun.

Erik Krogen 6 éve
szülő
commit
7146e67893

+ 4 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -91,6 +91,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_KEY;
 import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.SECURITY_XATTR_UNREADABLE_BY_SUPERUSER;
 import static org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.*;
+import static org.apache.hadoop.ha.HAServiceProtocol.HAServiceState.ACTIVE;
+import static org.apache.hadoop.ha.HAServiceProtocol.HAServiceState.OBSERVER;
 import org.apache.hadoop.hdfs.protocol.OpenFileEntry;
 import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
 import static org.apache.hadoop.util.Time.now;
@@ -1845,7 +1847,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
             SafeModeException se = newSafemodeException(
                 "Zero blocklocations for " + srcArg);
             if (haEnabled && haContext != null &&
-                haContext.getState().getServiceState() == HAServiceState.ACTIVE) {
+                (haContext.getState().getServiceState() == ACTIVE ||
+                    haContext.getState().getServiceState() == OBSERVER)) {
               throw new RetriableException(se);
             } else {
               throw se;

+ 7 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java

@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
 import static org.mockito.Mockito.spy;
 
@@ -214,6 +215,12 @@ public class NameNodeAdapter {
     return fsnSpy;
   }
 
+  public static BlockManager spyOnBlockManager(NameNode nn) {
+    BlockManager bmSpy = Mockito.spy(nn.getNamesystem().getBlockManager());
+    nn.getNamesystem().setBlockManagerForTesting(bmSpy);
+    return bmSpy;
+  }
+
   public static ReentrantReadWriteLock spyOnFsLock(FSNamesystem fsn) {
     ReentrantReadWriteLock spy = Mockito.spy(fsn.getFsLockForTests());
     fsn.setFsLockForTests(spy);

+ 67 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestObserverNode.java

@@ -25,8 +25,15 @@ import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.io.retry.FailoverProxyProvider;
 import org.apache.hadoop.io.retry.RetryInvocationHandler;
 import org.apache.hadoop.test.GenericTestUtils;
@@ -39,9 +46,12 @@ import java.io.File;
 import java.io.IOException;
 import java.lang.reflect.Proxy;
 import java.net.URI;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.TimeUnit;
 
+import static org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_KEY;
@@ -49,6 +59,13 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyBoolean;
+import static org.mockito.Matchers.anyLong;
+import static org.mockito.Matchers.anyShort;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.doNothing;
 
 // Main unit tests for ObserverNode
 public class TestObserverNode {
@@ -300,6 +317,56 @@ public class TestObserverNode {
     assertEquals(0, rc);
   }
 
+  /**
+   * Test the case where Observer should throw RetriableException, just like
+   * active NN, for certain open() calls where block locations are not
+   * available. See HDFS-13898 for details.
+   */
+  @Test
+  public void testObserverNodeSafeModeWithBlockLocations() throws Exception {
+    setUpCluster(1);
+    setObserverRead(true);
+
+    // Avoid starting DNs for the mini cluster.
+    BlockManager bmSpy = NameNodeAdapter.spyOnBlockManager(namenodes[0]);
+    doNothing().when(bmSpy)
+        .verifyReplication(anyString(), anyShort(), anyString());
+
+    // Create a new file - the request should go to active.
+    dfs.createNewFile(testPath);
+    assertSentTo(0);
+
+    rollEditLogAndTail(0);
+    dfs.open(testPath);
+    assertSentTo(2);
+
+    // Set observer to safe mode.
+    dfsCluster.getFileSystem(2).setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+
+    // Mock block manager for observer to generate some fake blocks which
+    // will trigger the (retriable) safe mode exception.
+    final DatanodeInfo[] empty = {};
+    bmSpy = NameNodeAdapter.spyOnBlockManager(namenodes[2]);
+    doAnswer((invocation) -> {
+      ExtendedBlock b = new ExtendedBlock("fake-pool", new Block(12345L));
+      LocatedBlock fakeBlock = new LocatedBlock(b, empty);
+      List<LocatedBlock> fakeBlocks = new ArrayList<>();
+      fakeBlocks.add(fakeBlock);
+      return new LocatedBlocks(0, false, fakeBlocks, null, true, null, null);
+    }).when(bmSpy).createLocatedBlocks(any(), anyLong(), anyBoolean(),
+        anyLong(), anyLong(), anyBoolean(), anyBoolean(), any(), any());
+
+    // Open the file again - it should throw retriable exception and then
+    // failover to active.
+    dfs.open(testPath);
+    assertSentTo(0);
+
+    // Remove safe mode on observer, request should still go to it.
+    dfsCluster.getFileSystem(2).setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
+    dfs.open(testPath);
+    assertSentTo(2);
+  }
+
   // TODO this does not currently work because fetching the service state from
   // e.g. the StandbyNameNode also waits for the transaction ID to catch up.
   // This is disabled pending HDFS-13872 and HDFS-13749.