Browse Source

HDFS-13898. [SBN read] Throw retriable exception for getBlockLocations when ObserverNameNode is in safemode. Contributed by Chao Sun.

Erik Krogen 6 years ago
parent
commit
b74a7dbf88

+ 10 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -93,6 +93,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_LI
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY;
 import static org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.*;
 import static org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.*;
+import static org.apache.hadoop.ha.HAServiceProtocol.HAServiceState.ACTIVE;
+import static org.apache.hadoop.ha.HAServiceProtocol.HAServiceState.OBSERVER;
 
 
 import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyInfo;
 import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyInfo;
 import org.apache.hadoop.hdfs.protocol.OpenFilesIterator.OpenFilesType;
 import org.apache.hadoop.hdfs.protocol.OpenFilesIterator.OpenFilesType;
@@ -463,7 +465,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
 
 
   /** The namespace tree. */
   /** The namespace tree. */
   FSDirectory dir;
   FSDirectory dir;
-  private final BlockManager blockManager;
+  private BlockManager blockManager;
   private final SnapshotManager snapshotManager;
   private final SnapshotManager snapshotManager;
   private final CacheManager cacheManager;
   private final CacheManager cacheManager;
   private final DatanodeStatistics datanodeStatistics;
   private final DatanodeStatistics datanodeStatistics;
@@ -1966,7 +1968,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
             SafeModeException se = newSafemodeException(
             SafeModeException se = newSafemodeException(
                 "Zero blocklocations for " + srcArg);
                 "Zero blocklocations for " + srcArg);
             if (haEnabled && haContext != null &&
             if (haEnabled && haContext != null &&
-                haContext.getState().getServiceState() == HAServiceState.ACTIVE) {
+                (haContext.getState().getServiceState() == ACTIVE ||
+                    haContext.getState().getServiceState() == OBSERVER)) {
               throw new RetriableException(se);
               throw new RetriableException(se);
             } else {
             } else {
               throw se;
               throw se;
@@ -6301,6 +6304,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     return blockManager;
     return blockManager;
   }
   }
 
 
+  @VisibleForTesting
+  public void setBlockManagerForTesting(BlockManager bm) {
+    this.blockManager = bm;
+  }
+
   /** @return the FSDirectory. */
   /** @return the FSDirectory. */
   @Override
   @Override
   public FSDirectory getFSDirectory() {
   public FSDirectory getFSDirectory() {

+ 7 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java

@@ -17,6 +17,7 @@
  */
  */
 package org.apache.hadoop.hdfs.server.namenode;
 package org.apache.hadoop.hdfs.server.namenode;
 
 
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
 import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
 import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.spy;
 
 
@@ -223,6 +224,12 @@ public class NameNodeAdapter {
     return fsnSpy;
     return fsnSpy;
   }
   }
 
 
+  public static BlockManager spyOnBlockManager(NameNode nn) {
+    BlockManager bmSpy = Mockito.spy(nn.getNamesystem().getBlockManager());
+    nn.getNamesystem().setBlockManagerForTesting(bmSpy);
+    return bmSpy;
+  }
+
   public static ReentrantReadWriteLock spyOnFsLock(FSNamesystem fsn) {
   public static ReentrantReadWriteLock spyOnFsLock(FSNamesystem fsn) {
     ReentrantReadWriteLock spy = Mockito.spy(fsn.getFsLockForTests());
     ReentrantReadWriteLock spy = Mockito.spy(fsn.getFsLockForTests());
     fsn.setFsLockForTests(spy);
     fsn.setFsLockForTests(spy);

+ 67 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestObserverNode.java

@@ -24,8 +24,15 @@ import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster;
 import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.io.retry.FailoverProxyProvider;
 import org.apache.hadoop.io.retry.FailoverProxyProvider;
 import org.apache.hadoop.io.retry.RetryInvocationHandler;
 import org.apache.hadoop.io.retry.RetryInvocationHandler;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.GenericTestUtils;
@@ -38,9 +45,12 @@ import java.io.File;
 import java.io.IOException;
 import java.io.IOException;
 import java.lang.reflect.Proxy;
 import java.lang.reflect.Proxy;
 import java.net.URI;
 import java.net.URI;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeUnit;
 
 
+import static org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_KEY;
@@ -48,6 +58,13 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 import static org.junit.Assert.fail;
 
 
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyBoolean;
+import static org.mockito.Matchers.anyLong;
+import static org.mockito.Matchers.anyShort;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.doNothing;
 
 
 // Main unit tests for ObserverNode
 // Main unit tests for ObserverNode
 public class TestObserverNode {
 public class TestObserverNode {
@@ -299,6 +316,56 @@ public class TestObserverNode {
     assertEquals(0, rc);
     assertEquals(0, rc);
   }
   }
 
 
+  /**
+   * Test the case where Observer should throw RetriableException, just like
+   * active NN, for certain open() calls where block locations are not
+   * available. See HDFS-13898 for details.
+   */
+  @Test
+  public void testObserverNodeSafeModeWithBlockLocations() throws Exception {
+    setUpCluster(1);
+    setObserverRead(true);
+
+    // Avoid starting DNs for the mini cluster.
+    BlockManager bmSpy = NameNodeAdapter.spyOnBlockManager(namenodes[0]);
+    doNothing().when(bmSpy)
+        .verifyReplication(anyString(), anyShort(), anyString());
+
+    // Create a new file - the request should go to active.
+    dfs.createNewFile(testPath);
+    assertSentTo(0);
+
+    rollEditLogAndTail(0);
+    dfs.open(testPath);
+    assertSentTo(2);
+
+    // Set observer to safe mode.
+    dfsCluster.getFileSystem(2).setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+
+    // Mock block manager for observer to generate some fake blocks which
+    // will trigger the (retriable) safe mode exception.
+    final DatanodeInfo[] empty = {};
+    bmSpy = NameNodeAdapter.spyOnBlockManager(namenodes[2]);
+    doAnswer((invocation) -> {
+      ExtendedBlock b = new ExtendedBlock("fake-pool", new Block(12345L));
+      LocatedBlock fakeBlock = new LocatedBlock(b, empty);
+      List<LocatedBlock> fakeBlocks = new ArrayList<>();
+      fakeBlocks.add(fakeBlock);
+      return new LocatedBlocks(0, false, fakeBlocks, null, true, null, null);
+    }).when(bmSpy).createLocatedBlocks(any(), anyLong(), anyBoolean(),
+        anyLong(), anyLong(), anyBoolean(), anyBoolean(), any(), any());
+
+    // Open the file again - it should throw retriable exception and then
+    // failover to active.
+    dfs.open(testPath);
+    assertSentTo(0);
+
+    // Remove safe mode on observer, request should still go to it.
+    dfsCluster.getFileSystem(2).setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
+    dfs.open(testPath);
+    assertSentTo(2);
+  }
+
   // TODO this does not currently work because fetching the service state from
   // TODO this does not currently work because fetching the service state from
   // e.g. the StandbyNameNode also waits for the transaction ID to catch up.
   // e.g. the StandbyNameNode also waits for the transaction ID to catch up.
   // This is disabled pending HDFS-13872 and HDFS-13749.
   // This is disabled pending HDFS-13872 and HDFS-13749.