소스 검색

HDFS-13901. INode access time is ignored because of race between open and rename. Contributed by Jinglun.

Wei-Chiu Chuang 5 년 전
부모
커밋
72003b19bf

+ 7 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java

@@ -186,7 +186,7 @@ class FSDirStatAndListingOp {
       boolean updateAccessTime = fsd.isAccessTimeSupported()
           && !iip.isSnapshot()
           && now > inode.getAccessTime() + fsd.getAccessTimePrecision();
-      return new GetBlockLocationsResult(updateAccessTime, blocks);
+      return new GetBlockLocationsResult(updateAccessTime, blocks, iip);
     } finally {
       fsd.readUnlock();
     }
@@ -599,13 +599,18 @@ class FSDirStatAndListingOp {
   static class GetBlockLocationsResult {
     final boolean updateAccessTime;
     final LocatedBlocks blocks;
+    private final INodesInPath iip;
     boolean updateAccessTime() {
       return updateAccessTime;
     }
+    public INodesInPath getIIp() {
+      return iip;
+    }
     private GetBlockLocationsResult(
-        boolean updateAccessTime, LocatedBlocks blocks) {
+        boolean updateAccessTime, LocatedBlocks blocks, INodesInPath iip) {
       this.updateAccessTime = updateAccessTime;
       this.blocks = blocks;
+      this.iip = iip;
     }
   }
 }

+ 10 - 26
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -1974,12 +1974,14 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     checkOperation(OperationCategory.READ);
     GetBlockLocationsResult res = null;
     final FSPermissionChecker pc = getPermissionChecker();
+    final INode inode;
     try {
       readLock();
       try {
         checkOperation(OperationCategory.READ);
         res = FSDirStatAndListingOp.getBlockLocations(
             dir, pc, srcArg, offset, length, true);
+        inode = res.getIIp().getLastINode();
         if (isInSafeMode()) {
           for (LocatedBlock b : res.blocks.getLocatedBlocks()) {
             // if safemode & no block locations yet then throw safemodeException
@@ -2022,34 +2024,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
         final long now = now();
         try {
           checkOperation(OperationCategory.WRITE);
-          /**
-           * Resolve the path again and update the atime only when the file
-           * exists.
-           *
-           * XXX: Races can still occur even after resolving the path again.
-           * For example:
-           *
-           * <ul>
-           *   <li>Get the block location for "/a/b"</li>
-           *   <li>Rename "/a/b" to "/c/b"</li>
-           *   <li>The second resolution still points to "/a/b", which is
-           *   wrong.</li>
-           * </ul>
-           *
-           * The behavior is incorrect but consistent with the one before
-           * HDFS-7463. A better fix is to change the edit log of SetTime to
-           * use inode id instead of a path.
-           */
-          final INodesInPath iip = dir.resolvePath(pc, srcArg, DirOp.READ);
-          src = iip.getPath();
-
-          INode inode = iip.getLastINode();
-          boolean updateAccessTime = inode != null &&
+          boolean updateAccessTime =
               now > inode.getAccessTime() + dir.getAccessTimePrecision();
           if (!isInSafeMode() && updateAccessTime) {
-            boolean changed = FSDirAttrOp.setTimes(dir, iip, -1, now, false);
-            if (changed) {
-              getEditLog().logTimes(src, -1, now);
+            if (!inode.isDeleted()) {
+              src = inode.getFullPathName();
+              final INodesInPath iip = dir.resolvePath(pc, src, DirOp.READ);
+              boolean changed = FSDirAttrOp.setTimes(dir, iip, -1, now, false);
+              if (changed) {
+                getEditLog().logTimes(src, -1, now);
+              }
             }
           }
         } finally {

+ 12 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java

@@ -589,6 +589,18 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
     return DFSUtil.bytes2String(path);
   }
 
+  public boolean isDeleted() {
+    INode pInode = this;
+    while (pInode != null && !pInode.isRoot()) {
+      pInode = pInode.getParent();
+    }
+    if (pInode == null) {
+      return true;
+    } else {
+      return !pInode.isRoot();
+    }
+  }
+
   public byte[][] getPathComponents() {
     int n = 0;
     for (INode inode = this; inode != null; inode = inode.getParent()) {

+ 72 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java

@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdfs.server.namenode;
 
 import java.io.FileNotFoundException;
+import java.io.IOException;
 import java.util.AbstractMap;
 import java.util.ArrayList;
 import java.util.Comparator;
@@ -27,10 +28,13 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
+import java.util.concurrent.Semaphore;
 
+import org.apache.hadoop.fs.Options;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.hdfs.AddBlockFlag;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
@@ -65,6 +69,7 @@ import org.mockito.Mockito;
 
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY;
+import static org.junit.Assert.assertNotEquals;
 
 /**
  * Test race between delete and other operations.  For now only addBlock()
@@ -441,4 +446,71 @@ public class TestDeleteRace {
       }
     }
   }
+
+  @Test(timeout = 20000)
+  public void testOpenRenameRace() throws Exception {
+    Configuration config = new Configuration();
+    config.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 1);
+    MiniDFSCluster dfsCluster = null;
+    final String src = "/dir/src-file";
+    final String dst = "/dir/dst-file";
+    final DistributedFileSystem hdfs;
+    try {
+      dfsCluster = new MiniDFSCluster.Builder(config).build();
+      dfsCluster.waitActive();
+      final FSNamesystem fsn = dfsCluster.getNamesystem();
+      hdfs = dfsCluster.getFileSystem();
+      DFSTestUtil.createFile(hdfs, new Path(src), 5, (short) 1, 0xFEED);
+      FileStatus status = hdfs.getFileStatus(new Path(src));
+      long accessTime = status.getAccessTime();
+
+      Semaphore openSem = new Semaphore(0);
+      Semaphore renameSem = new Semaphore(0);
+      // 1.hold writeLock.
+      // 2.start open thread.
+      // 3.openSem & yield makes sure open thread wait on readLock.
+      // 4.start rename thread.
+      // 5.renameSem & yield makes sure rename thread wait on writeLock.
+      // 6.release writeLock, it's fair lock so open thread gets read lock.
+      // 7.open thread unlocks, rename gets write lock and does rename.
+      // 8.rename thread unlocks, open thread gets write lock and update time.
+      Thread open = new Thread(() -> {
+        try {
+          openSem.release();
+          fsn.getBlockLocations("foo", src, 0, 5);
+        } catch (IOException e) {
+        }
+      });
+      Thread rename = new Thread(() -> {
+        try {
+          openSem.acquire();
+          renameSem.release();
+          fsn.renameTo(src, dst, false, Options.Rename.NONE);
+        } catch (IOException e) {
+        } catch (InterruptedException e) {
+        }
+      });
+      fsn.writeLock();
+      open.start();
+      openSem.acquire();
+      Thread.yield();
+      openSem.release();
+      rename.start();
+      renameSem.acquire();
+      Thread.yield();
+      fsn.writeUnlock();
+
+      // wait open and rename threads finish.
+      open.join();
+      rename.join();
+
+      status = hdfs.getFileStatus(new Path(dst));
+      assertNotEquals(accessTime, status.getAccessTime());
+      dfsCluster.restartNameNode(0);
+    } finally {
+      if (dfsCluster != null) {
+        dfsCluster.shutdown();
+      }
+    }
+  }
 }