Browse Source

Merge branch 'trunk' into HDFS-7240

 Conflicts:
	hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh
Anu Engineer 7 năm trước cách đây
mục cha
commit
7de498c3ab
100 tập tin đã thay đổi với 3191 bổ sung444 xóa
  1. 9 0
      hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.cmd
  2. 9 0
      hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh
  3. 30 5
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/MapFile.java
  4. 4 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/AuthorizationException.java
  5. 1 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CpuTimeTracker.java
  6. 58 1
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestMapFile.java
  7. 14 38
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
  8. 41 0
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/HdfsKMSUtil.java
  9. 3 0
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
  10. 87 14
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
  11. 2 0
      hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsContentLength.java
  12. 21 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
  13. 85 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/router/Router.java
  14. 155 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterHeartbeatService.java
  15. 36 7
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcServer.java
  16. 53 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterSafeModeException.java
  17. 150 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterSafemodeService.java
  18. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterServiceState.java
  19. 5 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/store/StateStoreCacheUpdateService.java
  20. 9 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/store/StateStoreService.java
  21. 3 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionFaultInjector.java
  22. 1 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirEncryptionZoneOp.java
  23. 57 28
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
  24. 54 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
  25. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md
  26. 4 0
      hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSRouterFederation.md
  27. 17 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java
  28. 218 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZones.java
  29. 7 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingMultipleRacks.java
  30. 13 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/federation/RouterConfigBuilder.java
  31. 192 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterSafemode.java
  32. 194 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/federation/store/TestStateStoreRouterState.java
  33. 15 8
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/web/resources/TestWebHdfsDataLocality.java
  34. 0 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java
  35. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsTokens.java
  36. 1 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/LocalContainerLauncher.java
  37. 9 6
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java
  38. 2 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java
  39. 1 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
  40. 53 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptFailEvent.java
  41. 39 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskTAttemptFailedEvent.java
  42. 30 10
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
  43. 4 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java
  44. 4 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java
  45. 3 4
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestFail.java
  46. 4 3
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRecovery.java
  47. 2 3
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java
  48. 4 5
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java
  49. 22 20
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java
  50. 2 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalJobRunner.java
  51. 2 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java
  52. 95 6
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java
  53. 9 3
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java
  54. 3 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/pipes/Application.java
  55. 14 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
  56. 22 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
  57. 88 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestTaskProgressReporter.java
  58. 7 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CachedHistoryStorage.java
  59. 24 6
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java
  60. 26 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistory.java
  61. 5 4
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java
  62. 2 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapProgress.java
  63. 1 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestTaskCommit.java
  64. 11 8
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/pipes/TestPipeApplication.java
  65. 2 1
      hadoop-tools/hadoop-azure-datalake/pom.xml
  66. 34 41
      hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java
  67. 23 18
      hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java
  68. 55 19
      hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java
  69. 2 2
      hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NMSimulator.java
  70. 2 9
      hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java
  71. 3 9
      hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java
  72. 1 1
      hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSLSRunner.java
  73. 6 1
      hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/appmaster/TestAMSimulator.java
  74. 4 6
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
  75. 0 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfigurationFields.java
  76. 3 8
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java
  77. 11 11
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/AbstractLauncher.java
  78. 0 7
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/docker/DockerKeys.java
  79. 1 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/docker/DockerProviderService.java
  80. 11 4
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/ServiceApiUtil.java
  81. 3 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java
  82. 9 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java
  83. 33 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
  84. 20 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/main/java/org/apache/hadoop/registry/client/binding/RegistryPathUtils.java
  85. 4 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/main/java/org/apache/hadoop/registry/client/binding/RegistryUtils.java
  86. 2 20
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/main/java/org/apache/hadoop/registry/server/dns/BaseServiceRecordProcessor.java
  87. 9 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/test/java/org/apache/hadoop/registry/client/binding/TestRegistryPathUtils.java
  88. 4 7
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
  89. 1 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
  90. 8 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java
  91. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java
  92. 29 23
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsMemoryResourceHandlerImpl.java
  93. 357 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsResourceCalculator.java
  94. 108 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CombinedResourceCalculator.java
  95. 33 10
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java
  96. 29 15
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
  97. 22 11
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
  98. 1 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelinePublisher.java
  99. 45 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsMemoryResourceHandlerImpl.java
  100. 274 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsResourceCalculator.java

+ 9 - 0
hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.cmd

@@ -70,6 +70,15 @@ set HADOOP_SECURE_DN_USER=%HADOOP_SECURE_DN_USER%
 @rem Where log files are stored in the secure data environment.
 set HADOOP_SECURE_DN_LOG_DIR=%HADOOP_LOG_DIR%\%HADOOP_HDFS_USER%
 
+@rem
+@rem Router-based HDFS Federation specific parameters
+@rem Specify the JVM options to be used when starting the RBF Routers.
+@rem These options will be appended to the options specified as HADOOP_OPTS
+@rem and therefore may override any similar flags set in HADOOP_OPTS
+@rem
+@rem set HADOOP_DFSROUTER_OPTS=""
+@rem
+
 @rem The directory where pid files are stored. /tmp by default.
 @rem NOTE: this should be set to a directory that can only be written to by 
 @rem       the user that will run the hadoop daemons.  Otherwise there is the

+ 9 - 0
hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh

@@ -395,6 +395,15 @@ esac
 #
 # export HDFS_MOVER_OPTS=""
 
+###
+# Router-based HDFS Federation specific parameters
+# Specify the JVM options to be used when starting the RBF Routers.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# export HDFS_DFSROUTER_OPTS=""
+
+
 ###
 # HDFS CBlock Server specific parameters
 ###

+ 30 - 5
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/MapFile.java

@@ -811,15 +811,40 @@ public class MapFile {
                                     (LongWritable.class));
     }
     try {
-      long pos = 0L;
+      /** What's the position (in bytes) we wrote when we got the last index */
+      long lastIndexPos = -1;
+      /**
+       * What was size when we last wrote an index. Set to MIN_VALUE to ensure
+       * that we have an index at position zero - midKey will throw an exception
+       * if this is not the case
+       */
+      long lastIndexKeyCount = Long.MIN_VALUE;
+      long pos = dataReader.getPosition();
       LongWritable position = new LongWritable();
+      long nextBlock = pos;
+      boolean blockCompressed = dataReader.isBlockCompressed();
       while(dataReader.next(key, value)) {
-        cnt++;
-        if (cnt % indexInterval == 0) {
+        if (blockCompressed) {
+          long curPos = dataReader.getPosition();
+          if (curPos > nextBlock) {
+            pos = nextBlock;                       // current block position
+            nextBlock = curPos;
+          }
+        }
+        // Follow the same logic as in
+        // {@link MapFile.Writer#append(WritableComparable, Writable)}
+        if (cnt >= lastIndexKeyCount + indexInterval && pos > lastIndexPos) {
           position.set(pos);
-          if (!dryrun) indexWriter.append(key, position);
+          if (!dryrun) {
+            indexWriter.append(key, position);
+          }
+          lastIndexPos = pos;
+          lastIndexKeyCount = cnt;
+        }
+        if (!blockCompressed) {
+          pos = dataReader.getPosition();         // next record position
         }
-        pos = dataReader.getPosition();
+        cnt++;
       }
     } catch(Throwable t) {
       // truncated data file. swallow it.

+ 4 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/AuthorizationException.java

@@ -64,17 +64,19 @@ public class AuthorizationException extends AccessControlException {
 
   @Override
   public void printStackTrace() {
-    // Do not provide the stack-trace
+    printStackTrace(System.err);
   }
 
   @Override
   public void printStackTrace(PrintStream s) {
     // Do not provide the stack-trace
+    s.println(this);
   }
 
   @Override
   public void printStackTrace(PrintWriter s) {
     // Do not provide the stack-trace
+    s.println(this);
   }
-  
+
 }

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CpuTimeTracker.java

@@ -99,7 +99,7 @@ public class CpuTimeTracker {
   public void updateElapsedJiffies(BigInteger elapsedJiffies, long newTime) {
     BigInteger newValue = elapsedJiffies.multiply(jiffyLengthInMillis);
     cumulativeCpuTime = newValue.compareTo(cumulativeCpuTime) >= 0 ?
-        newValue : cumulativeCpuTime;
+            newValue : cumulativeCpuTime;
     sampleTime = newTime;
   }
 

+ 58 - 1
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestMapFile.java

@@ -485,6 +485,63 @@ public class TestMapFile {
       IOUtils.cleanup(null, writer);
     }
   }
+
+  /**
+   * test {@link MapFile#fix(FileSystem, Path, Class<? extends Writable>,
+   *                         Class<? extends Writable>, boolean, Configuration)}
+   * method in case of BLOCK compression
+   */
+  @Test
+  public void testFixBlockCompress() throws Exception {
+    final String indexLessMapFile = "testFixBlockCompress.mapfile";
+    final int compressBlocksize = 100;
+    final int indexInterval = 4;
+    final int noBlocks = 4;
+    final String value = "value-";
+    final int size = noBlocks * compressBlocksize / (4 + value.length());
+
+    conf.setInt("io.seqfile.compress.blocksize", compressBlocksize);
+    MapFile.Writer.setIndexInterval(conf, indexInterval);
+    FileSystem fs = FileSystem.getLocal(conf);
+    Path dir = new Path(TEST_DIR, indexLessMapFile);
+    MapFile.Writer writer = null;
+    MapFile.Reader reader = null;
+    try {
+      writer =
+          new MapFile.Writer(conf, dir,
+          MapFile.Writer.keyClass(IntWritable.class),
+          MapFile.Writer.valueClass(Text.class),
+          MapFile.Writer.compression(CompressionType.BLOCK));
+      for (int i = 0; i < size; i++) {
+        writer.append(new IntWritable(i), new Text(value + i));
+      }
+      writer.close();
+      Path index = new Path(dir, MapFile.INDEX_FILE_NAME);
+      fs.rename(index, index.suffix(".orig"));
+
+      assertEquals("No of valid MapFile entries wrong", size,
+                   MapFile.fix(fs, dir, IntWritable.class, Text.class,
+                               false, conf));
+      reader = new MapFile.Reader(dir, conf);
+      IntWritable key;
+      Text val = new Text();
+      int notFound = 0;
+      for (int i = 0; i < size; i++) {
+        key = new IntWritable(i);
+        if (null == reader.get(key, val)) {
+          notFound++;
+        }
+      }
+      assertEquals("With MapFile.fix-ed index, could not get entries # ",
+                   0, notFound);
+    } finally {
+      IOUtils.cleanupWithLogger(null, writer, reader);
+      if (fs.exists(dir)) {
+        fs.delete(dir, true);
+      }
+    }
+  }
+
   /**
    * test all available constructor for {@code MapFile.Writer}
    */
@@ -619,7 +676,7 @@ public class TestMapFile {
     } catch (Exception ex) {
       fail("testMainMethodMapFile error !!!");
     } finally {
-      IOUtils.cleanup(null, writer);
+      IOUtils.cleanupWithLogger(null, writer);
     }
   }
 

+ 14 - 38
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java

@@ -38,7 +38,6 @@ import java.net.Socket;
 import java.net.SocketAddress;
 import java.net.URI;
 import java.net.UnknownHostException;
-import java.security.GeneralSecurityException;
 import java.util.ArrayList;
 import java.util.EnumSet;
 import java.util.HashMap;
@@ -62,8 +61,6 @@ import org.apache.hadoop.crypto.CryptoInputStream;
 import org.apache.hadoop.crypto.CryptoOutputStream;
 import org.apache.hadoop.crypto.key.KeyProvider;
 import org.apache.hadoop.crypto.key.KeyProvider.KeyVersion;
-import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension;
-import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion;
 import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.CacheFlag;
 import org.apache.hadoop.fs.ContentSummary;
@@ -82,6 +79,7 @@ import org.apache.hadoop.fs.Options;
 import org.apache.hadoop.fs.Options.ChecksumOpt;
 import org.apache.hadoop.fs.ParentNotDirectoryException;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException;
 import org.apache.hadoop.fs.QuotaUsage;
 import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.hadoop.fs.StorageType;
@@ -909,46 +907,19 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory,
     }
   }
 
-  /**
-   * Decrypts a EDEK by consulting the KeyProvider.
-   */
-  private KeyVersion decryptEncryptedDataEncryptionKey(FileEncryptionInfo
-      feInfo) throws IOException {
-    try (TraceScope ignored = tracer.newScope("decryptEDEK")) {
-      KeyProvider provider = getKeyProvider();
-      if (provider == null) {
-        throw new IOException("No KeyProvider is configured, cannot access" +
-            " an encrypted file");
-      }
-      EncryptedKeyVersion ekv = EncryptedKeyVersion.createForDecryption(
-          feInfo.getKeyName(), feInfo.getEzKeyVersionName(), feInfo.getIV(),
-          feInfo.getEncryptedDataEncryptionKey());
-      try {
-        KeyProviderCryptoExtension cryptoProvider = KeyProviderCryptoExtension
-            .createKeyProviderCryptoExtension(provider);
-        return cryptoProvider.decryptEncryptedKey(ekv);
-      } catch (GeneralSecurityException e) {
-        throw new IOException(e);
-      }
-    }
-  }
-
   /**
    * Wraps the stream in a CryptoInputStream if the underlying file is
    * encrypted.
    */
   public HdfsDataInputStream createWrappedInputStream(DFSInputStream dfsis)
       throws IOException {
-    final FileEncryptionInfo feInfo = dfsis.getFileEncryptionInfo();
+    FileEncryptionInfo feInfo = dfsis.getFileEncryptionInfo();
     if (feInfo != null) {
-      // File is encrypted, wrap the stream in a crypto stream.
-      // Currently only one version, so no special logic based on the version #
-      HdfsKMSUtil.getCryptoProtocolVersion(feInfo);
-      final CryptoCodec codec = HdfsKMSUtil.getCryptoCodec(conf, feInfo);
-      final KeyVersion decrypted = decryptEncryptedDataEncryptionKey(feInfo);
-      final CryptoInputStream cryptoIn =
-          new CryptoInputStream(dfsis, codec, decrypted.getMaterial(),
-              feInfo.getIV());
+      CryptoInputStream cryptoIn;
+      try (TraceScope ignored = getTracer().newScope("decryptEDEK")) {
+        cryptoIn = HdfsKMSUtil.createWrappedInputStream(dfsis,
+            getKeyProvider(), feInfo, getConfiguration());
+      }
       return new HdfsDataInputStream(cryptoIn);
     } else {
       // No FileEncryptionInfo so no encryption.
@@ -977,7 +948,11 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory,
       // Currently only one version, so no special logic based on the version #
       HdfsKMSUtil.getCryptoProtocolVersion(feInfo);
       final CryptoCodec codec = HdfsKMSUtil.getCryptoCodec(conf, feInfo);
-      KeyVersion decrypted = decryptEncryptedDataEncryptionKey(feInfo);
+      KeyVersion decrypted;
+      try (TraceScope ignored = tracer.newScope("decryptEDEK")) {
+        decrypted = HdfsKMSUtil.decryptEncryptedDataEncryptionKey(feInfo,
+          getKeyProvider());
+      }
       final CryptoOutputStream cryptoOut =
           new CryptoOutputStream(dfsos, codec,
               decrypted.getMaterial(), feInfo.getIV(), startPos);
@@ -1620,7 +1595,8 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory,
           FileNotFoundException.class,
           SafeModeException.class,
           UnresolvedPathException.class,
-          SnapshotAccessControlException.class);
+          SnapshotAccessControlException.class,
+          PathIsNotEmptyDirectoryException.class);
     }
   }
 

+ 41 - 0
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/HdfsKMSUtil.java

@@ -20,15 +20,21 @@ package org.apache.hadoop.hdfs;
 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_CRYPTO_CODEC_CLASSES_KEY_PREFIX;
 
 import java.io.IOException;
+import java.io.InputStream;
 import java.net.URI;
+import java.security.GeneralSecurityException;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.crypto.CipherSuite;
 import org.apache.hadoop.crypto.CryptoCodec;
+import org.apache.hadoop.crypto.CryptoInputStream;
 import org.apache.hadoop.crypto.CryptoProtocolVersion;
 import org.apache.hadoop.crypto.key.KeyProvider;
+import org.apache.hadoop.crypto.key.KeyProvider.KeyVersion;
+import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension;
+import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion;
 import org.apache.hadoop.crypto.key.KeyProviderDelegationTokenExtension;
 import org.apache.hadoop.crypto.key.KeyProviderTokenIssuer;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
@@ -187,4 +193,39 @@ public final class HdfsKMSUtil {
     return new Text(DFS_KMS_PREFIX + namenodeUri.getScheme()
         +"://" + namenodeUri.getAuthority());
   }
+
+  public static CryptoInputStream createWrappedInputStream(InputStream is,
+      KeyProvider keyProvider, FileEncryptionInfo fileEncryptionInfo,
+      Configuration conf) throws IOException {
+    // File is encrypted, wrap the stream in a crypto stream.
+    // Currently only one version, so no special logic based on the version#
+    HdfsKMSUtil.getCryptoProtocolVersion(fileEncryptionInfo);
+    final CryptoCodec codec = HdfsKMSUtil.getCryptoCodec(
+        conf, fileEncryptionInfo);
+    final KeyVersion decrypted =
+        decryptEncryptedDataEncryptionKey(fileEncryptionInfo, keyProvider);
+    return new CryptoInputStream(is, codec, decrypted.getMaterial(),
+        fileEncryptionInfo.getIV());
+  }
+
+  /**
+   * Decrypts a EDEK by consulting the KeyProvider.
+   */
+  static KeyVersion decryptEncryptedDataEncryptionKey(FileEncryptionInfo
+      feInfo, KeyProvider keyProvider) throws IOException {
+    if (keyProvider == null) {
+      throw new IOException("No KeyProvider is configured, cannot access" +
+          " an encrypted file");
+    }
+    EncryptedKeyVersion ekv = EncryptedKeyVersion.createForDecryption(
+        feInfo.getKeyName(), feInfo.getEzKeyVersionName(), feInfo.getIV(),
+        feInfo.getEncryptedDataEncryptionKey());
+    try {
+      KeyProviderCryptoExtension cryptoProvider = KeyProviderCryptoExtension
+          .createKeyProviderCryptoExtension(keyProvider);
+      return cryptoProvider.decryptEncryptedKey(ekv);
+    } catch (GeneralSecurityException e) {
+      throw new IOException(e);
+    }
+  }
 }

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java

@@ -26,6 +26,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.crypto.CryptoProtocolVersion;
 import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedEntries;
+import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException;
 import org.apache.hadoop.hdfs.AddBlockFlag;
 import org.apache.hadoop.fs.CacheFlag;
 import org.apache.hadoop.fs.ContentSummary;
@@ -625,6 +626,8 @@ public interface ClientProtocol {
    * @throws org.apache.hadoop.fs.UnresolvedLinkException If <code>src</code>
    *           contains a symlink
    * @throws SnapshotAccessControlException if path is in RO snapshot
+   * @throws PathIsNotEmptyDirectoryException if path is a non-empty directory
+   *           and <code>recursive</code> is set to false
    * @throws IOException If an I/O error occurred
    */
   @AtMostOnce

+ 87 - 14
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java

@@ -37,8 +37,11 @@ import java.net.InetSocketAddress;
 import java.net.MalformedURLException;
 import java.net.URI;
 import java.net.URL;
+import java.nio.charset.StandardCharsets;
 import java.security.PrivilegedExceptionAction;
 import java.util.ArrayList;
+import java.util.Base64;
+import java.util.Base64.Decoder;
 import java.util.Collection;
 import java.util.EnumSet;
 import java.util.HashSet;
@@ -66,6 +69,7 @@ import org.apache.hadoop.fs.DelegationTokenRenewer;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FSInputStream;
+import org.apache.hadoop.fs.FileEncryptionInfo;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FsServerDefaults;
@@ -92,6 +96,8 @@ import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.FileEncryptionInfoProto;
+import org.apache.hadoop.hdfs.protocolPB.PBHelperClient;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.web.resources.*;
 import org.apache.hadoop.hdfs.web.resources.HttpOpParam.Op;
@@ -133,6 +139,8 @@ public class WebHdfsFileSystem extends FileSystem
   /** Http URI: http://namenode:port/{PATH_PREFIX}/path/to/file */
   public static final String PATH_PREFIX = "/" + WebHdfsConstants.WEBHDFS_SCHEME
       + "/v" + VERSION;
+  public static final String EZ_HEADER = "X-Hadoop-Accept-EZ";
+  public static final String FEFINFO_HEADER = "X-Hadoop-feInfo";
 
   /**
    * Default connection factory may be overridden in tests to use smaller
@@ -613,12 +621,19 @@ public class WebHdfsFileSystem extends FileSystem
 
     private boolean checkRetry;
     private String redirectHost;
+    private boolean followRedirect = true;
 
     protected AbstractRunner(final HttpOpParam.Op op, boolean redirected) {
       this.op = op;
       this.redirected = redirected;
     }
 
+    protected AbstractRunner(final HttpOpParam.Op op, boolean redirected,
+        boolean followRedirect) {
+      this(op, redirected);
+      this.followRedirect = followRedirect;
+    }
+
     T run() throws IOException {
       UserGroupInformation connectUgi = ugi.getRealUser();
       if (connectUgi == null) {
@@ -685,9 +700,17 @@ public class WebHdfsFileSystem extends FileSystem
           // See http://tinyurl.com/java7-http-keepalive
           conn.disconnect();
         }
+        if (!followRedirect) {
+          return conn;
+        }
       }
       try {
-        return connect(op, url);
+        final HttpURLConnection conn = connect(op, url);
+        // output streams will validate on close
+        if (!op.getDoOutput()) {
+          validateResponse(op, conn, false);
+        }
+        return conn;
       } catch (IOException ioe) {
         if (redirectHost != null) {
           if (excludeDatanodes.getValue() != null) {
@@ -713,6 +736,7 @@ public class WebHdfsFileSystem extends FileSystem
         // The value of the header is unimportant.  Only its presence matters.
         conn.setRequestProperty(restCsrfCustomHeader, "\"\"");
       }
+      conn.setRequestProperty(EZ_HEADER, "true");
       switch (op.getType()) {
       // if not sending a message body for a POST or PUT operation, need
       // to ensure the server/proxy knows this
@@ -760,10 +784,6 @@ public class WebHdfsFileSystem extends FileSystem
         final URL url = getUrl();
         try {
           final HttpURLConnection conn = connect(url);
-          // output streams will validate on close
-          if (!op.getDoOutput()) {
-            validateResponse(op, conn, false);
-          }
           return getResponse(conn);
         } catch (AccessControlException ace) {
           // no retries for auth failures
@@ -809,7 +829,6 @@ public class WebHdfsFileSystem extends FileSystem
               a.action == RetryPolicy.RetryAction.RetryDecision.RETRY;
           boolean isFailoverAndRetry =
               a.action == RetryPolicy.RetryAction.RetryDecision.FAILOVER_AND_RETRY;
-
           if (isRetry || isFailoverAndRetry) {
             LOG.info("Retrying connect to namenode: {}. Already retried {}"
                     + " time(s); retry policy is {}, delay {}ms.",
@@ -990,16 +1009,16 @@ public class WebHdfsFileSystem extends FileSystem
   /**
    * Used by open() which tracks the resolved url itself
    */
-  final class URLRunner extends AbstractRunner<HttpURLConnection> {
+  class URLRunner extends AbstractRunner<HttpURLConnection> {
     private final URL url;
     @Override
-    protected URL getUrl() {
+    protected URL getUrl() throws IOException {
       return url;
     }
 
     protected URLRunner(final HttpOpParam.Op op, final URL url,
-        boolean redirected) {
-      super(op, redirected);
+        boolean redirected, boolean followRedirect) {
+      super(op, redirected, followRedirect);
       this.url = url;
     }
 
@@ -1412,12 +1431,20 @@ public class WebHdfsFileSystem extends FileSystem
     ).run();
   }
 
+  @SuppressWarnings("resource")
   @Override
   public FSDataInputStream open(final Path f, final int bufferSize
   ) throws IOException {
     statistics.incrementReadOps(1);
     storageStatistics.incrementOpCounter(OpType.OPEN);
-    return new FSDataInputStream(new WebHdfsInputStream(f, bufferSize));
+    WebHdfsInputStream webfsInputStream =
+        new WebHdfsInputStream(f, bufferSize);
+    if (webfsInputStream.getFileEncryptionInfo() == null) {
+      return new FSDataInputStream(webfsInputStream);
+    } else {
+      return new FSDataInputStream(
+          webfsInputStream.createWrappedInputStream());
+    }
   }
 
   @Override
@@ -1462,7 +1489,8 @@ public class WebHdfsFileSystem extends FileSystem
         final boolean resolved) throws IOException {
       final URL offsetUrl = offset == 0L? url
           : new URL(url + "&" + new OffsetParam(offset));
-      return new URLRunner(GetOpParam.Op.OPEN, offsetUrl, resolved).run();
+      return new URLRunner(GetOpParam.Op.OPEN, offsetUrl, resolved,
+          true).run();
     }
   }
 
@@ -1928,6 +1956,15 @@ public class WebHdfsFileSystem extends FileSystem
     void setReadRunner(ReadRunner rr) {
       this.readRunner = rr;
     }
+
+    FileEncryptionInfo getFileEncryptionInfo() {
+      return readRunner.getFileEncryptionInfo();
+    }
+
+    InputStream createWrappedInputStream() throws IOException {
+      return HdfsKMSUtil.createWrappedInputStream(
+          this, getKeyProvider(), getFileEncryptionInfo(), getConf());
+    }
   }
 
   enum RunnerState {
@@ -1964,7 +2001,7 @@ public class WebHdfsFileSystem extends FileSystem
     private byte[] readBuffer;
     private int readOffset;
     private int readLength;
-    private RunnerState runnerState = RunnerState.DISCONNECTED;
+    private RunnerState runnerState = RunnerState.SEEK;
     private URL originalUrl = null;
     private URL resolvedUrl = null;
 
@@ -1972,6 +2009,7 @@ public class WebHdfsFileSystem extends FileSystem
     private final int bufferSize;
     private long pos = 0;
     private long fileLength = 0;
+    private FileEncryptionInfo feInfo = null;
 
     /* The following methods are WebHdfsInputStream helpers. */
 
@@ -1979,6 +2017,36 @@ public class WebHdfsFileSystem extends FileSystem
       super(GetOpParam.Op.OPEN, p, new BufferSizeParam(bs));
       this.path = p;
       this.bufferSize = bs;
+      getRedirectedUrl();
+    }
+
+    private void getRedirectedUrl() throws IOException {
+      URLRunner urlRunner = new URLRunner(GetOpParam.Op.OPEN, null, false,
+          false) {
+        @Override
+        protected URL getUrl() throws IOException {
+          return toUrl(op, path, new BufferSizeParam(bufferSize));
+        }
+      };
+      HttpURLConnection conn = urlRunner.run();
+      String feInfoStr = conn.getHeaderField(FEFINFO_HEADER);
+      if (feInfoStr != null) {
+        Decoder decoder = Base64.getDecoder();
+        byte[] decodedBytes = decoder.decode(
+            feInfoStr.getBytes(StandardCharsets.UTF_8));
+        feInfo = PBHelperClient
+            .convert(FileEncryptionInfoProto.parseFrom(decodedBytes));
+      }
+      String location = conn.getHeaderField("Location");
+      if (location != null) {
+        // This saves the location for datanode where redirect was issued.
+        // Need to remove offset because seek can be called after open.
+        resolvedUrl = removeOffsetParam(new URL(location));
+      } else {
+        // This is cached for proxies like httpfsfilesystem.
+        cachedConnection = conn;
+      }
+      originalUrl = super.getUrl();
     }
 
     int read(byte[] b, int off, int len) throws IOException {
@@ -2011,7 +2079,8 @@ public class WebHdfsFileSystem extends FileSystem
       if (runnerState == RunnerState.SEEK) {
         try {
           final URL rurl = new URL(resolvedUrl + "&" + new OffsetParam(pos));
-          cachedConnection = new URLRunner(GetOpParam.Op.OPEN, rurl, true).run();
+          cachedConnection = new URLRunner(GetOpParam.Op.OPEN, rurl, true,
+              false).run();
         } catch (IOException ioe) {
           closeInputStream(RunnerState.DISCONNECTED);
         }
@@ -2195,5 +2264,9 @@ public class WebHdfsFileSystem extends FileSystem
     long getPos() {
       return pos;
     }
+
+    protected FileEncryptionInfo getFileEncryptionInfo() {
+      return feInfo;
+    }
   }
 }

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsContentLength.java

@@ -102,12 +102,14 @@ public class TestWebHdfsContentLength {
   public void testGetOpWithRedirect() {
     Future<String> future1 = contentLengthFuture(redirectResponse);
     Future<String> future2 = contentLengthFuture(errResponse);
+    Future<String> future3 = contentLengthFuture(errResponse);
     try {
       fs.open(p).read();
       Assert.fail();
     } catch (IOException ioe) {} // expected
     Assert.assertEquals(null, getContentLength(future1));
     Assert.assertEquals(null, getContentLength(future2));
+    Assert.assertEquals(null, getContentLength(future3));
   }
   
   @Test

+ 21 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

@@ -1222,6 +1222,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String DFS_ROUTER_MONITOR_LOCAL_NAMENODE =
       FEDERATION_ROUTER_PREFIX + "monitor.localnamenode.enable";
   public static final boolean DFS_ROUTER_MONITOR_LOCAL_NAMENODE_DEFAULT = true;
+  public static final String DFS_ROUTER_HEARTBEAT_STATE_INTERVAL_MS =
+      FEDERATION_ROUTER_PREFIX + "heartbeat-state.interval";
+  public static final long DFS_ROUTER_HEARTBEAT_STATE_INTERVAL_MS_DEFAULT =
+      TimeUnit.SECONDS.toMillis(5);
 
   // HDFS Router NN client
   public static final String DFS_ROUTER_NAMENODE_CONNECTION_POOL_SIZE =
@@ -1282,6 +1286,23 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
       FEDERATION_STORE_PREFIX + "membership.expiration";
   public static final long FEDERATION_STORE_MEMBERSHIP_EXPIRATION_MS_DEFAULT =
       TimeUnit.MINUTES.toMillis(5);
+  public static final String FEDERATION_STORE_ROUTER_EXPIRATION_MS =
+      FEDERATION_STORE_PREFIX + "router.expiration";
+  public static final long FEDERATION_STORE_ROUTER_EXPIRATION_MS_DEFAULT =
+      TimeUnit.MINUTES.toMillis(5);
+
+  // HDFS Router safe mode
+  public static final String DFS_ROUTER_SAFEMODE_ENABLE =
+      FEDERATION_ROUTER_PREFIX + "safemode.enable";
+  public static final boolean DFS_ROUTER_SAFEMODE_ENABLE_DEFAULT = true;
+  public static final String DFS_ROUTER_SAFEMODE_EXTENSION =
+      FEDERATION_ROUTER_PREFIX + "safemode.extension";
+  public static final long DFS_ROUTER_SAFEMODE_EXTENSION_DEFAULT =
+      TimeUnit.SECONDS.toMillis(30);
+  public static final String DFS_ROUTER_SAFEMODE_EXPIRATION =
+      FEDERATION_ROUTER_PREFIX + "safemode.expiration";
+  public static final long DFS_ROUTER_SAFEMODE_EXPIRATION_DEFAULT =
+      3 * DFS_ROUTER_CACHE_TIME_TO_LIVE_MS_DEFAULT;
 
   // HDFS Router-based federation mount table entries
   /** Maximum number of cache entries to have. */

+ 85 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/router/Router.java

@@ -37,11 +37,13 @@ import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.server.federation.metrics.FederationMetrics;
 import org.apache.hadoop.hdfs.server.federation.resolver.ActiveNamenodeResolver;
 import org.apache.hadoop.hdfs.server.federation.resolver.FileSubclusterResolver;
+import org.apache.hadoop.hdfs.server.federation.store.RouterStore;
 import org.apache.hadoop.hdfs.server.federation.store.StateStoreService;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.metrics2.source.JvmMetrics;
 import org.apache.hadoop.service.CompositeService;
 import org.apache.hadoop.util.JvmPauseMonitor;
+import org.apache.hadoop.util.Time;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -112,6 +114,20 @@ public class Router extends CompositeService {
   /** Quota cache manager. */
   private RouterQuotaManager quotaManager;
 
+  /** Manages the current state of the router. */
+  private RouterStore routerStateManager;
+  /** Heartbeat our run status to the router state manager. */
+  private RouterHeartbeatService routerHeartbeatService;
+  /** Enter/exit safemode. */
+  private RouterSafemodeService safemodeService;
+
+  /** The start time of the namesystem. */
+  private final long startTime = Time.now();
+
+  /** State of the Router. */
+  private RouterServiceState state = RouterServiceState.UNINITIALIZED;
+
+
   /////////////////////////////////////////////////////////
   // Constructor
   /////////////////////////////////////////////////////////
@@ -127,6 +143,7 @@ public class Router extends CompositeService {
   @Override
   protected void serviceInit(Configuration configuration) throws Exception {
     this.conf = configuration;
+    updateRouterState(RouterServiceState.INITIALIZING);
 
     if (conf.getBoolean(
         DFSConfigKeys.DFS_ROUTER_STORE_ENABLE,
@@ -188,6 +205,10 @@ public class Router extends CompositeService {
       if (this.namenodeHearbeatServices.isEmpty()) {
         LOG.error("Heartbeat is enabled but there are no namenodes to monitor");
       }
+
+      // Periodically update the router state
+      this.routerHeartbeatService = new RouterHeartbeatService(this);
+      addService(this.routerHeartbeatService);
     }
 
     // Router metrics system
@@ -213,12 +234,26 @@ public class Router extends CompositeService {
       addService(this.quotaUpdateService);
     }
 
+    // Safemode service to refuse RPC calls when the router is out of sync
+    if (conf.getBoolean(
+        DFSConfigKeys.DFS_ROUTER_SAFEMODE_ENABLE,
+        DFSConfigKeys.DFS_ROUTER_SAFEMODE_ENABLE_DEFAULT)) {
+      // Create safemode monitoring service
+      this.safemodeService = new RouterSafemodeService(this);
+      addService(this.safemodeService);
+    }
+
     super.serviceInit(conf);
   }
 
   @Override
   protected void serviceStart() throws Exception {
 
+    if (this.safemodeService == null) {
+      // Router is running now
+      updateRouterState(RouterServiceState.RUNNING);
+    }
+
     if (this.pauseMonitor != null) {
       this.pauseMonitor.start();
       JvmMetrics jvmMetrics = this.metrics.getJvmMetrics();
@@ -233,6 +268,9 @@ public class Router extends CompositeService {
   @Override
   protected void serviceStop() throws Exception {
 
+    // Update state
+    updateRouterState(RouterServiceState.SHUTDOWN);
+
     // JVM pause monitor
     if (this.pauseMonitor != null) {
       this.pauseMonitor.stop();
@@ -453,6 +491,31 @@ public class Router extends CompositeService {
     return ret;
   }
 
+  /////////////////////////////////////////////////////////
+  // Router State Management
+  /////////////////////////////////////////////////////////
+
+  /**
+   * Update the router state and heartbeat to the state store.
+   *
+   * @param state The new router state.
+   */
+  public void updateRouterState(RouterServiceState newState) {
+    this.state = newState;
+    if (this.routerHeartbeatService != null) {
+      this.routerHeartbeatService.updateStateAsync();
+    }
+  }
+
+  /**
+   * Get the status of the router.
+   *
+   * @return Status of the router.
+   */
+  public RouterServiceState getRouterState() {
+    return this.state;
+  }
+
   /////////////////////////////////////////////////////////
   // Submodule getters
   /////////////////////////////////////////////////////////
@@ -508,10 +571,32 @@ public class Router extends CompositeService {
     return this.namenodeResolver;
   }
 
+  /**
+   * Get the state store interface for the router heartbeats.
+   *
+   * @return FederationRouterStateStore state store API handle.
+   */
+  public RouterStore getRouterStateManager() {
+    if (this.routerStateManager == null && this.stateStore != null) {
+      this.routerStateManager = this.stateStore.getRegisteredRecordStore(
+          RouterStore.class);
+    }
+    return this.routerStateManager;
+  }
+
   /////////////////////////////////////////////////////////
   // Router info
   /////////////////////////////////////////////////////////
 
+  /**
+   * Get the start date of the Router.
+   *
+   * @return Start date of the router.
+   */
+  public long getStartTime() {
+    return this.startTime;
+  }
+
   /**
    * Unique ID for the router, typically the hostname:port string for the
    * router's RPC server. This ID may be null on router startup before the RPC

+ 155 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterHeartbeatService.java

@@ -0,0 +1,155 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.federation.router;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.server.federation.store.CachedRecordStore;
+import org.apache.hadoop.hdfs.server.federation.store.MembershipStore;
+import org.apache.hadoop.hdfs.server.federation.store.MountTableStore;
+import org.apache.hadoop.hdfs.server.federation.store.RecordStore;
+import org.apache.hadoop.hdfs.server.federation.store.RouterStore;
+import org.apache.hadoop.hdfs.server.federation.store.StateStoreService;
+import org.apache.hadoop.hdfs.server.federation.store.protocol.RouterHeartbeatRequest;
+import org.apache.hadoop.hdfs.server.federation.store.protocol.RouterHeartbeatResponse;
+import org.apache.hadoop.hdfs.server.federation.store.records.BaseRecord;
+import org.apache.hadoop.hdfs.server.federation.store.records.RouterState;
+import org.apache.hadoop.hdfs.server.federation.store.records.StateStoreVersion;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Service to periodically update the Router current state in the State Store.
+ */
+public class RouterHeartbeatService extends PeriodicService {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(RouterHeartbeatService.class);
+
+  /** Router we are hearbeating. */
+  private final Router router;
+
+  /**
+   * Create a new Router heartbeat service.
+   *
+   * @param router Router to heartbeat.
+   */
+  public RouterHeartbeatService(Router router) {
+    super(RouterHeartbeatService.class.getSimpleName());
+    this.router = router;
+  }
+
+  /**
+   * Trigger the update of the Router state asynchronously.
+   */
+  protected void updateStateAsync() {
+    Thread thread = new Thread(new Runnable() {
+      @Override
+      public void run() {
+        updateStateStore();
+      }
+    }, "Router Heartbeat Async");
+    thread.setDaemon(true);
+    thread.start();
+  }
+
+  /**
+   * Update the state of the Router in the State Store.
+   */
+  private synchronized void updateStateStore() {
+    String routerId = router.getRouterId();
+    if (routerId == null) {
+      LOG.error("Cannot heartbeat for router: unknown router id");
+      return;
+    }
+    RouterStore routerStore = router.getRouterStateManager();
+    if (routerStore != null) {
+      try {
+        RouterState record = RouterState.newInstance(
+            routerId, router.getStartTime(), router.getRouterState());
+        StateStoreVersion stateStoreVersion = StateStoreVersion.newInstance(
+            getStateStoreVersion(MembershipStore.class),
+            getStateStoreVersion(MountTableStore.class));
+        record.setStateStoreVersion(stateStoreVersion);
+        RouterHeartbeatRequest request =
+            RouterHeartbeatRequest.newInstance(record);
+        RouterHeartbeatResponse response = routerStore.routerHeartbeat(request);
+        if (!response.getStatus()) {
+          LOG.warn("Cannot heartbeat router {}", routerId);
+        } else {
+          LOG.debug("Router heartbeat for router {}", routerId);
+        }
+      } catch (IOException e) {
+        LOG.error("Cannot heartbeat router {}: {}", routerId, e.getMessage());
+      }
+    } else {
+      LOG.warn("Cannot heartbeat router {}: State Store unavailable", routerId);
+    }
+  }
+
+  /**
+   * Get the version of the data in the State Store.
+   *
+   * @param clazz Class in the State Store.
+   * @return Version of the data.
+   */
+  private <R extends BaseRecord, S extends RecordStore<R>>
+      long getStateStoreVersion(final Class<S> clazz) {
+    long version = -1;
+    try {
+      StateStoreService stateStore = router.getStateStore();
+      S recordStore = stateStore.getRegisteredRecordStore(clazz);
+      if (recordStore != null) {
+        if (recordStore instanceof CachedRecordStore) {
+          CachedRecordStore<R> cachedRecordStore =
+              (CachedRecordStore<R>) recordStore;
+          List<R> records = cachedRecordStore.getCachedRecords();
+          for (BaseRecord record : records) {
+            if (record.getDateModified() > version) {
+              version = record.getDateModified();
+            }
+          }
+        }
+      }
+    } catch (Exception e) {
+      LOG.error("Cannot get version for {}: {}", clazz, e.getMessage());
+    }
+    return version;
+  }
+
+  @Override
+  protected void serviceInit(Configuration conf) throws Exception {
+
+    long interval = conf.getTimeDuration(
+        DFSConfigKeys.DFS_ROUTER_HEARTBEAT_STATE_INTERVAL_MS,
+        DFSConfigKeys.DFS_ROUTER_HEARTBEAT_STATE_INTERVAL_MS_DEFAULT,
+        TimeUnit.MILLISECONDS);
+    this.setIntervalMs(interval);
+
+    super.serviceInit(conf);
+  }
+
+  @Override
+  public void periodicInvoke() {
+    updateStateStore();
+  }
+}

+ 36 - 7
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcServer.java

@@ -179,6 +179,8 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol {
   /** Interface to map global name space to HDFS subcluster name spaces. */
   private final FileSubclusterResolver subclusterResolver;
 
+  /** If we are in safe mode, fail requests as if a standby NN. */
+  private volatile boolean safeMode;
 
   /** Category of the operation that a thread is executing. */
   private final ThreadLocal<OperationCategory> opCategory = new ThreadLocal<>();
@@ -370,12 +372,12 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol {
    * @param op Category of the operation to check.
    * @param supported If the operation is supported or not. If not, it will
    *                  throw an UnsupportedOperationException.
-   * @throws StandbyException If the Router is in safe mode and cannot serve
-   *                          client requests.
+   * @throws SafeModeException If the Router is in safe mode and cannot serve
+   *                           client requests.
    * @throws UnsupportedOperationException If the operation is not supported.
    */
   protected void checkOperation(OperationCategory op, boolean supported)
-      throws StandbyException, UnsupportedOperationException {
+      throws RouterSafeModeException, UnsupportedOperationException {
     checkOperation(op);
 
     if (!supported) {
@@ -393,10 +395,11 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol {
    * UNCHECKED. This function should be called by all ClientProtocol functions.
    *
    * @param op Category of the operation to check.
-   * @throws StandbyException If the Router is in safe mode and cannot serve
-   *                          client requests.
+   * @throws SafeModeException If the Router is in safe mode and cannot serve
+   *                           client requests.
    */
-  protected void checkOperation(OperationCategory op) throws StandbyException {
+  protected void checkOperation(OperationCategory op)
+      throws RouterSafeModeException {
     // Log the function we are currently calling.
     if (rpcMonitor != null) {
       rpcMonitor.startOp();
@@ -415,7 +418,33 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol {
       return;
     }
 
-    // TODO check Router safe mode and return Standby exception
+    if (safeMode) {
+      // Throw standby exception, router is not available
+      if (rpcMonitor != null) {
+        rpcMonitor.routerFailureSafemode();
+      }
+      throw new RouterSafeModeException(router.getRouterId(), op);
+    }
+  }
+
+  /**
+   * In safe mode all RPC requests will fail and return a standby exception.
+   * The client will try another Router, similar to the client retry logic for
+   * HA.
+   *
+   * @param mode True if enabled, False if disabled.
+   */
+  public void setSafeMode(boolean mode) {
+    this.safeMode = mode;
+  }
+
+  /**
+   * Check if the Router is in safe mode and cannot serve RPC calls.
+   *
+   * @return If the Router is in safe mode.
+   */
+  public boolean isInSafeMode() {
+    return this.safeMode;
   }
 
   @Override // ClientProtocol

+ 53 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterSafeModeException.java

@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.federation.router;
+
+import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
+import org.apache.hadoop.ipc.StandbyException;
+
+/**
+ * Exception that the Router throws when it is in safe mode. This extends
+ * {@link StandbyException} for the client to try another Router when it gets
+ * this exception.
+ */
+public class RouterSafeModeException extends StandbyException {
+
+  private static final long serialVersionUID = 453568188334993493L;
+
+  /** Identifier of the Router that generated this exception. */
+  private final String routerId;
+
+  /**
+   * Build a new Router safe mode exception.
+   * @param router Identifier of the Router.
+   * @param op Category of the operation (READ/WRITE).
+   */
+  public RouterSafeModeException(String router, OperationCategory op) {
+    super("Router " + router + " is in safe mode and cannot handle " + op
+        + " requests.");
+    this.routerId = router;
+  }
+
+  /**
+   * Get the id of the Router that generated this exception.
+   * @return Id of the Router that generated this exception.
+   */
+  public String getRouterId() {
+    return this.routerId;
+  }
+}

+ 150 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterSafemodeService.java

@@ -0,0 +1,150 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.federation.router;
+
+import static org.apache.hadoop.util.Time.now;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.server.federation.store.StateStoreService;
+import org.apache.hadoop.util.Time;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Service to periodically check if the {@link org.apache.hadoop.hdfs.server.
+ * federation.store.StateStoreService StateStoreService} cached information in
+ * the {@link Router} is up to date. This is for performance and removes the
+ * {@link org.apache.hadoop.hdfs.server.federation.store.StateStoreService
+ * StateStoreService} from the critical path in common operations.
+ */
+public class RouterSafemodeService extends PeriodicService {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(RouterSafemodeService.class);
+
+  /** Router to manage safe mode. */
+  private final Router router;
+
+  /** Interval in ms to wait post startup before allowing RPC requests. */
+  private long startupInterval;
+  /** Interval in ms after which the State Store cache is too stale. */
+  private long staleInterval;
+  /** Start time in ms of this service. */
+  private long startupTime;
+
+  /** The time the Router enters safe mode in milliseconds. */
+  private long enterSafeModeTime = now();
+
+
+  /**
+   * Create a new Cache update service.
+   *
+   * @param router Router containing the cache.
+   */
+  public RouterSafemodeService(Router router) {
+    super(RouterSafemodeService.class.getSimpleName());
+    this.router = router;
+  }
+
+  /**
+   * Enter safe mode.
+   */
+  private void enter() {
+    LOG.info("Entering safe mode");
+    enterSafeModeTime = now();
+    RouterRpcServer rpcServer = router.getRpcServer();
+    rpcServer.setSafeMode(true);
+    router.updateRouterState(RouterServiceState.SAFEMODE);
+  }
+
+  /**
+   * Leave safe mode.
+   */
+  private void leave() {
+    // Cache recently updated, leave safemode
+    long timeInSafemode = now() - enterSafeModeTime;
+    LOG.info("Leaving safe mode after {} milliseconds", timeInSafemode);
+    RouterMetrics routerMetrics = router.getRouterMetrics();
+    if (routerMetrics == null) {
+      LOG.error("The Router metrics are not enabled");
+    } else {
+      routerMetrics.setSafeModeTime(timeInSafemode);
+    }
+    RouterRpcServer rpcServer = router.getRpcServer();
+    rpcServer.setSafeMode(false);
+    router.updateRouterState(RouterServiceState.RUNNING);
+  }
+
+  @Override
+  protected void serviceInit(Configuration conf) throws Exception {
+
+    // Use same interval as cache update service
+    this.setIntervalMs(conf.getTimeDuration(
+        DFSConfigKeys.DFS_ROUTER_CACHE_TIME_TO_LIVE_MS,
+        DFSConfigKeys.DFS_ROUTER_CACHE_TIME_TO_LIVE_MS_DEFAULT,
+        TimeUnit.MILLISECONDS));
+
+    this.startupInterval = conf.getTimeDuration(
+        DFSConfigKeys.DFS_ROUTER_SAFEMODE_EXTENSION,
+        DFSConfigKeys.DFS_ROUTER_SAFEMODE_EXTENSION_DEFAULT,
+        TimeUnit.MILLISECONDS);
+    LOG.info("Leave startup safe mode after {} ms", this.startupInterval);
+
+    this.staleInterval = conf.getTimeDuration(
+        DFSConfigKeys.DFS_ROUTER_SAFEMODE_EXPIRATION,
+        DFSConfigKeys.DFS_ROUTER_SAFEMODE_EXPIRATION_DEFAULT,
+        TimeUnit.MILLISECONDS);
+    LOG.info("Enter safe mode after {} ms without reaching the State Store",
+        this.staleInterval);
+
+    this.startupTime = Time.now();
+
+    // Initializing the RPC server in safe mode, it will disable it later
+    enter();
+
+    super.serviceInit(conf);
+  }
+
+  @Override
+  public void periodicInvoke() {
+    long now = Time.now();
+    long delta = now - startupTime;
+    if (delta < startupInterval) {
+      LOG.info("Delaying safemode exit for {} milliseconds...",
+          this.startupInterval - delta);
+      return;
+    }
+    RouterRpcServer rpcServer = router.getRpcServer();
+    StateStoreService stateStore = router.getStateStore();
+    long cacheUpdateTime = stateStore.getCacheUpdateTime();
+    boolean isCacheStale = (now - cacheUpdateTime) > this.staleInterval;
+
+    // Always update to indicate our cache was updated
+    if (isCacheStale) {
+      if (!rpcServer.isInSafeMode()) {
+        enter();
+      }
+    } else if (rpcServer.isInSafeMode()) {
+      // Cache recently updated, leave safe mode
+      leave();
+    }
+  }
+}

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterServiceState.java

@@ -21,7 +21,7 @@ package org.apache.hadoop.hdfs.server.federation.router;
  * States of the Router.
  */
 public enum RouterServiceState {
-  NONE,
+  UNINITIALIZED,
   INITIALIZING,
   SAFEMODE,
   RUNNING,

+ 5 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/store/StateStoreCacheUpdateService.java

@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hdfs.server.federation.store;
 
+import java.util.concurrent.TimeUnit;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.server.federation.router.PeriodicService;
@@ -52,9 +54,10 @@ public class StateStoreCacheUpdateService extends PeriodicService {
   @Override
   protected void serviceInit(Configuration conf) throws Exception {
 
-    this.setIntervalMs(conf.getLong(
+    this.setIntervalMs(conf.getTimeDuration(
         DFSConfigKeys.DFS_ROUTER_CACHE_TIME_TO_LIVE_MS,
-        DFSConfigKeys.DFS_ROUTER_CACHE_TIME_TO_LIVE_MS_DEFAULT));
+        DFSConfigKeys.DFS_ROUTER_CACHE_TIME_TO_LIVE_MS_DEFAULT,
+        TimeUnit.MILLISECONDS));
 
     super.serviceInit(conf);
   }

+ 9 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/federation/store/StateStoreService.java

@@ -24,6 +24,7 @@ import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.TimeUnit;
 
 import javax.management.NotCompliantMBeanException;
 import javax.management.ObjectName;
@@ -38,8 +39,10 @@ import org.apache.hadoop.hdfs.server.federation.metrics.StateStoreMetrics;
 import org.apache.hadoop.hdfs.server.federation.store.driver.StateStoreDriver;
 import org.apache.hadoop.hdfs.server.federation.store.impl.MembershipStoreImpl;
 import org.apache.hadoop.hdfs.server.federation.store.impl.MountTableStoreImpl;
+import org.apache.hadoop.hdfs.server.federation.store.impl.RouterStoreImpl;
 import org.apache.hadoop.hdfs.server.federation.store.records.BaseRecord;
 import org.apache.hadoop.hdfs.server.federation.store.records.MembershipState;
+import org.apache.hadoop.hdfs.server.federation.store.records.RouterState;
 import org.apache.hadoop.metrics2.MetricsException;
 import org.apache.hadoop.metrics2.util.MBeans;
 import org.apache.hadoop.service.CompositeService;
@@ -148,6 +151,7 @@ public class StateStoreService extends CompositeService {
     // Add supported record stores
     addRecordStore(MembershipStoreImpl.class);
     addRecordStore(MountTableStoreImpl.class);
+    addRecordStore(RouterStoreImpl.class);
 
     // Check the connection to the State Store periodically
     this.monitorService = new StateStoreConnectionMonitorService(this);
@@ -158,6 +162,11 @@ public class StateStoreService extends CompositeService {
         DFSConfigKeys.FEDERATION_STORE_MEMBERSHIP_EXPIRATION_MS,
         DFSConfigKeys.FEDERATION_STORE_MEMBERSHIP_EXPIRATION_MS_DEFAULT));
 
+    RouterState.setExpirationMs(conf.getTimeDuration(
+        DFSConfigKeys.FEDERATION_STORE_ROUTER_EXPIRATION_MS,
+        DFSConfigKeys.FEDERATION_STORE_ROUTER_EXPIRATION_MS_DEFAULT,
+        TimeUnit.MILLISECONDS));
+
     // Cache update service
     this.cacheUpdater = new StateStoreCacheUpdateService(this);
     addService(this.cacheUpdater);

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionFaultInjector.java

@@ -51,4 +51,7 @@ public class EncryptionFaultInjector {
 
   @VisibleForTesting
   public void reencryptUpdaterProcessCheckpoint() throws IOException {}
+
+  @VisibleForTesting
+  public void ensureKeyIsInitialized() throws IOException {}
 }

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirEncryptionZoneOp.java

@@ -121,6 +121,7 @@ final class FSDirEncryptionZoneOp {
       throw new IOException("Must specify a key name when creating an "
           + "encryption zone");
     }
+    EncryptionFaultInjector.getInstance().ensureKeyIsInitialized();
     KeyProvider.Metadata metadata = provider.getMetadata(keyName);
     if (metadata == null) {
       /*

+ 57 - 28
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java

@@ -28,6 +28,8 @@ import java.net.URISyntaxException;
 import java.net.UnknownHostException;
 import java.security.Principal;
 import java.security.PrivilegedExceptionAction;
+import java.util.Base64;
+import java.util.Base64.Encoder;
 import java.util.EnumSet;
 import java.util.HashSet;
 import java.util.List;
@@ -50,11 +52,14 @@ import javax.ws.rs.core.Context;
 import javax.ws.rs.core.MediaType;
 import javax.ws.rs.core.Response;
 import javax.ws.rs.core.StreamingOutput;
+import javax.ws.rs.core.Response.ResponseBuilder;
+import javax.ws.rs.core.Response.Status;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.FileEncryptionInfo;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FsServerDefaults;
@@ -73,6 +78,7 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
+import org.apache.hadoop.hdfs.protocolPB.PBHelperClient;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
@@ -117,9 +123,9 @@ public class NamenodeWebHdfsMethods {
   private Principal userPrincipal;
   private String remoteAddr;
 
-  private static volatile String serverDefaultsResponse = null;
   private @Context ServletContext context;
   private @Context HttpServletResponse response;
+  private boolean supportEZ;
 
   public NamenodeWebHdfsMethods(@Context HttpServletRequest request) {
     // the request object is a proxy to thread-locals so we have to extract
@@ -130,6 +136,8 @@ public class NamenodeWebHdfsMethods {
     // get the remote address, if coming in via a trusted proxy server then
     // the address with be that of the proxied client
     remoteAddr = JspHelper.getRemoteAddr(request);
+    supportEZ =
+        Boolean.valueOf(request.getHeader(WebHdfsFileSystem.EZ_HEADER));
   }
 
   private void init(final UserGroupInformation ugi,
@@ -228,7 +236,7 @@ public class NamenodeWebHdfsMethods {
   static DatanodeInfo chooseDatanode(final NameNode namenode,
       final String path, final HttpOpParam.Op op, final long openOffset,
       final long blocksize, final String excludeDatanodes,
-      final String remoteAddr) throws IOException {
+      final String remoteAddr, final HdfsFileStatus status) throws IOException {
     FSNamesystem fsn = namenode.getNamesystem();
     if (fsn == null) {
       throw new IOException("Namesystem has not been intialized yet.");
@@ -265,7 +273,6 @@ public class NamenodeWebHdfsMethods {
         || op == PostOpParam.Op.APPEND) {
       //choose a datanode containing a replica 
       final NamenodeProtocols np = getRPCServer(namenode);
-      final HdfsFileStatus status = np.getFileInfo(path);
       if (status == null) {
         throw new FileNotFoundException("File " + path + " not found.");
       }
@@ -285,7 +292,7 @@ public class NamenodeWebHdfsMethods {
           return bestNode(locations.get(0).getLocations(), excludes);
         }
       }
-    } 
+    }
 
     return (DatanodeDescriptor)bm.getDatanodeManager().getNetworkTopology(
         ).chooseRandom(NodeBase.ROOT, excludes);
@@ -322,15 +329,22 @@ public class NamenodeWebHdfsMethods {
     return t;
   }
 
-  private URI redirectURI(final NameNode namenode,
+  private URI redirectURI(ResponseBuilder rb, final NameNode namenode,
       final UserGroupInformation ugi, final DelegationParam delegation,
       final UserParam username, final DoAsParam doAsUser,
       final String path, final HttpOpParam.Op op, final long openOffset,
       final long blocksize, final String excludeDatanodes,
       final Param<?, ?>... parameters) throws URISyntaxException, IOException {
     final DatanodeInfo dn;
+    final NamenodeProtocols np = getRPCServer(namenode);
+    HdfsFileStatus status = null;
+    if (op == GetOpParam.Op.OPEN
+        || op == GetOpParam.Op.GETFILECHECKSUM
+        || op == PostOpParam.Op.APPEND) {
+      status = np.getFileInfo(path);
+    }
     dn = chooseDatanode(namenode, path, op, openOffset, blocksize,
-        excludeDatanodes, remoteAddr);
+        excludeDatanodes, remoteAddr, status);
     if (dn == null) {
       throw new IOException("Failed to find datanode, suggest to check cluster"
           + " health. excludeDatanodes=" + excludeDatanodes);
@@ -349,15 +363,27 @@ public class NamenodeWebHdfsMethods {
           namenode, ugi, null);
       delegationQuery = "&" + new DelegationParam(t.encodeToUrlString());
     }
-    final String query = op.toQueryString() + delegationQuery
-        + "&" + new NamenodeAddressParam(namenode)
-        + Param.toSortedString("&", parameters);
-    final String uripath = WebHdfsFileSystem.PATH_PREFIX + path;
+
+    StringBuilder queryBuilder = new StringBuilder();
+    queryBuilder.append(op.toQueryString());
+    queryBuilder.append(delegationQuery);
+    queryBuilder.append("&").append(new NamenodeAddressParam(namenode));
+    queryBuilder.append(Param.toSortedString("&", parameters));
+
+    boolean prependReservedRawPath  = false;
+    if (op == GetOpParam.Op.OPEN && supportEZ
+        && status.getFileEncryptionInfo() != null) {
+      prependReservedRawPath = true;
+      rb.header(WebHdfsFileSystem.FEFINFO_HEADER,
+          encodeFeInfo(status.getFileEncryptionInfo()));
+    }
+    final String uripath = WebHdfsFileSystem.PATH_PREFIX +
+        (prependReservedRawPath ? "/.reserved/raw" + path : path);
 
     int port = "http".equals(scheme) ? dn.getInfoPort() : dn
         .getInfoSecurePort();
     final URI uri = new URI(scheme, null, dn.getHostName(), port, uripath,
-        query, null);
+        queryBuilder.toString(), null);
 
     if (LOG.isTraceEnabled()) {
       LOG.trace("redirectURI=" + uri);
@@ -581,7 +607,7 @@ public class NamenodeWebHdfsMethods {
     switch(op.getValue()) {
     case CREATE:
     {
-      final URI uri = redirectURI(namenode, ugi, delegation, username,
+      final URI uri = redirectURI(null, namenode, ugi, delegation, username,
           doAsUser, fullpath, op.getValue(), -1L, blockSize.getValue(conf),
           exclDatanodes.getValue(), permission, unmaskedPermission,
           overwrite, bufferSize, replication, blockSize, createParent,
@@ -830,7 +856,7 @@ public class NamenodeWebHdfsMethods {
     case APPEND:
     {
       final NameNode namenode = (NameNode)context.getAttribute("name.node");
-      final URI uri = redirectURI(namenode, ugi, delegation, username,
+      final URI uri = redirectURI(null, namenode, ugi, delegation, username,
           doAsUser, fullpath, op.getValue(), -1L, -1L,
           excludeDatanodes.getValue(), bufferSize);
       if(!noredirectParam.getValue()) {
@@ -967,6 +993,13 @@ public class NamenodeWebHdfsMethods {
     });
   }
 
+  private static String encodeFeInfo(FileEncryptionInfo feInfo) {
+    Encoder encoder = Base64.getEncoder();
+    String encodedValue = encoder
+        .encodeToString(PBHelperClient.convert(feInfo).toByteArray());
+    return encodedValue;
+  }
+
   private Response get(
       final UserGroupInformation ugi,
       final DelegationParam delegation,
@@ -995,15 +1028,17 @@ public class NamenodeWebHdfsMethods {
     case OPEN:
     {
       final NameNode namenode = (NameNode)context.getAttribute("name.node");
-      final URI uri = redirectURI(namenode, ugi, delegation, username,
+      ResponseBuilder rb = Response.noContent();
+      final URI uri = redirectURI(rb, namenode, ugi, delegation, username,
           doAsUser, fullpath, op.getValue(), offset.getValue(), -1L,
           excludeDatanodes.getValue(), offset, length, bufferSize);
       if(!noredirectParam.getValue()) {
-        return Response.temporaryRedirect(uri)
-          .type(MediaType.APPLICATION_OCTET_STREAM).build();
+        return rb.status(Status.TEMPORARY_REDIRECT).location(uri)
+            .type(MediaType.APPLICATION_OCTET_STREAM).build();
       } else {
         final String js = JsonUtil.toJsonString("Location", uri);
-        return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
+        return rb.status(Status.OK).entity(js).type(MediaType.APPLICATION_JSON)
+            .build();
       }
     }
     case GET_BLOCK_LOCATIONS:
@@ -1039,8 +1074,8 @@ public class NamenodeWebHdfsMethods {
     case GETFILECHECKSUM:
     {
       final NameNode namenode = (NameNode)context.getAttribute("name.node");
-      final URI uri = redirectURI(namenode, ugi, delegation, username, doAsUser,
-          fullpath, op.getValue(), -1L, -1L, null);
+      final URI uri = redirectURI(null, namenode, ugi, delegation, username,
+          doAsUser, fullpath, op.getValue(), -1L, -1L, null);
       if(!noredirectParam.getValue()) {
         return Response.temporaryRedirect(uri)
           .type(MediaType.APPLICATION_OCTET_STREAM).build();
@@ -1140,9 +1175,12 @@ public class NamenodeWebHdfsMethods {
     case GETSERVERDEFAULTS: {
       // Since none of the server defaults values are hot reloaded, we can
       // cache the output of serverDefaults.
+      String serverDefaultsResponse =
+          (String) context.getAttribute("serverDefaults");
       if (serverDefaultsResponse == null) {
         FsServerDefaults serverDefaults = cp.getServerDefaults();
         serverDefaultsResponse = JsonUtil.toJsonString(serverDefaults);
+        context.setAttribute("serverDefaults", serverDefaultsResponse);
       }
       return Response.ok(serverDefaultsResponse)
           .type(MediaType.APPLICATION_JSON).build();
@@ -1152,15 +1190,6 @@ public class NamenodeWebHdfsMethods {
     }
   }
 
-  /*
-   * This is used only and only for testing.
-   * Please don't use it otherwise.
-   */
-  @VisibleForTesting
-  public static void resetServerDefaultsResponse() {
-    serverDefaultsResponse = null;
-  }
-
   private static String getTrashRoot(String fullPath,
       Configuration conf) throws IOException {
     FileSystem fs = FileSystem.get(conf != null ? conf : new Configuration());

+ 54 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

@@ -5080,9 +5080,12 @@
 
   <property>
     <name>dfs.federation.router.cache.ttl</name>
-    <value>60000</value>
+    <value>1m</value>
     <description>
-      How often to refresh the State Store caches in milliseconds.
+      How often to refresh the State Store caches in milliseconds. This setting
+      supports multiple time unit suffixes as described in
+      dfs.heartbeat.interval. If no suffix is specified then milliseconds is
+      assumed.
     </description>
   </property>
 
@@ -5110,6 +5113,55 @@
     </description>
   </property>
 
+  <property>
+    <name>dfs.federation.router.heartbeat-state.interval</name>
+    <value>5s</value>
+    <description>
+      How often the Router should heartbeat its state into the State Store in
+      milliseconds. This setting supports multiple time unit suffixes as
+      described in dfs.federation.router.quota-cache.update.interval.
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.federation.router.store.router.expiration</name>
+    <value>5m</value>
+    <description>
+      Expiration time in milliseconds for a router state record. This setting
+      supports multiple time unit suffixes as described in
+      dfs.federation.router.quota-cache.update.interval.
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.federation.router.safemode.enable</name>
+    <value>true</value>
+    <description>
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.federation.router.safemode.extension</name>
+    <value>30s</value>
+    <description>
+      Time after startup that the Router is in safe mode. This setting
+      supports multiple time unit suffixes as described in
+      dfs.heartbeat.interval. If no suffix is specified then milliseconds is
+      assumed.
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.federation.router.safemode.expiration</name>
+    <value>3m</value>
+    <description>
+      Time without being able to reach the State Store to enter safe mode. This
+      setting supports multiple time unit suffixes as described in
+      dfs.heartbeat.interval. If no suffix is specified then milliseconds is
+      assumed.
+    </description>
+  </property>
+
   <property>
     <name>dfs.federation.router.monitor.namenode</name>
     <value></value>

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md

@@ -49,7 +49,7 @@ Architecture
 
 In a typical HA cluster, two or more separate machines are configured as NameNodes. At any point in time, exactly one of the NameNodes is in an *Active* state, and the others are in a *Standby* state. The Active NameNode is responsible for all client operations in the cluster, while the Standbys are simply acting as workers, maintaining enough state to provide a fast failover if necessary.
 
-In order for the Standby node to keep its state synchronized with the Active node, both nodes communicate with a group of separate daemons called "JournalNodes" (JNs). When any namespace modification is performed by the Active node, it durably logs a record of the modification to a majority of these JNs. The Standby node is capable of reading the edits from the JNs, and is constantly watching them for changes to the edit log. As the Standby Node sees the edits, it applies them to its own namespace. In the event of a failover, the Standby will ensure that it has read all of the edits from the JounalNodes before promoting itself to the Active state. This ensures that the namespace state is fully synchronized before a failover occurs.
+In order for the Standby node to keep its state synchronized with the Active node, both nodes communicate with a group of separate daemons called "JournalNodes" (JNs). When any namespace modification is performed by the Active node, it durably logs a record of the modification to a majority of these JNs. The Standby node is capable of reading the edits from the JNs, and is constantly watching them for changes to the edit log. As the Standby Node sees the edits, it applies them to its own namespace. In the event of a failover, the Standby will ensure that it has read all of the edits from the JournalNodes before promoting itself to the Active state. This ensures that the namespace state is fully synchronized before a failover occurs.
 
 In order to provide a fast failover, it is also necessary that the Standby node have up-to-date information regarding the location of blocks in the cluster. In order to achieve this, the DataNodes are configured with the location of all NameNodes, and send block location information and heartbeats to all.
 

+ 4 - 0
hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSRouterFederation.md

@@ -81,6 +81,10 @@ The Routers are stateless and metadata operations are atomic at the NameNodes.
 If a Router becomes unavailable, any Router can take over for it.
 The clients configure their DFS HA client (e.g., ConfiguredFailoverProvider or RequestHedgingProxyProvider) with all the Routers in the federation as endpoints.
 
+* **Unavailable State Store:**
+If a Router cannot contact the State Store, it will enter into a Safe Mode state which disallows it from serving requests.
+Clients will treat Routers in Safe Mode as it was an Standby NameNode and try another Router.
+
 * **NameNode heartbeat HA:**
 For high availability and flexibility, multiple Routers can monitor the same NameNode and heartbeat the information to the State Store.
 This increases clients' resiliency to stale information, should a Router fail.

+ 17 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java

@@ -67,6 +67,7 @@ import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
 import org.apache.hadoop.fs.Options.ChecksumOpt;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException;
 import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.hadoop.fs.StorageStatistics.LongStatistic;
 import org.apache.hadoop.fs.StorageType;
@@ -571,6 +572,22 @@ public class TestDistributedFileSystem {
         in.close();
         fs.close();
       }
+
+      {
+        // Test PathIsNotEmptyDirectoryException while deleting non-empty dir
+        FileSystem fs = cluster.getFileSystem();
+        fs.mkdirs(new Path("/test/nonEmptyDir"));
+        fs.create(new Path("/tmp/nonEmptyDir/emptyFile")).close();
+        try {
+          fs.delete(new Path("/tmp/nonEmptyDir"), false);
+          Assert.fail("Expecting PathIsNotEmptyDirectoryException");
+        } catch (PathIsNotEmptyDirectoryException ex) {
+          // This is the proper exception to catch; move on.
+        }
+        Assert.assertTrue(fs.exists(new Path("/test/nonEmptyDir")));
+        fs.delete(new Path("/tmp/nonEmptyDir"), true);
+      }
+
     }
     finally {
       if (cluster != null) {cluster.shutdown();}

+ 218 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZones.java

@@ -20,10 +20,14 @@ package org.apache.hadoop.hdfs;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.PrintStream;
 import java.io.RandomAccessFile;
 import java.io.StringReader;
+import java.net.HttpURLConnection;
+import java.net.InetSocketAddress;
 import java.net.URI;
+import java.net.URL;
 import java.security.PrivilegedExceptionAction;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -41,12 +45,14 @@ import com.google.common.collect.Lists;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.crypto.CipherSuite;
+import org.apache.hadoop.crypto.CryptoInputStream;
 import org.apache.hadoop.crypto.CryptoProtocolVersion;
 import org.apache.hadoop.crypto.key.JavaKeyStoreProvider;
 import org.apache.hadoop.crypto.key.KeyProvider;
 import org.apache.hadoop.crypto.key.KeyProviderFactory;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.CreateFlag;
+import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FSTestWrapper;
 import org.apache.hadoop.fs.FileContext;
@@ -80,9 +86,12 @@ import org.apache.hadoop.hdfs.web.WebHdfsConstants;
 import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
 import org.apache.hadoop.hdfs.web.WebHdfsTestUtil;
 import org.apache.hadoop.io.EnumSetWritable;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.authorize.AuthorizationException;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.DataChecksum;
 import org.apache.hadoop.util.ToolRunner;
@@ -149,6 +158,9 @@ public class TestEncryptionZones {
   private File testRootDir;
   protected final String TEST_KEY = "test_key";
   private static final String NS_METRICS = "FSNamesystem";
+  private static final String  AUTHORIZATION_EXCEPTION_MESSAGE =
+      "User [root] is not authorized to perform [READ] on key " +
+          "with ACL name [key2]!!";
 
   protected FileSystemTestWrapper fsWrapper;
   protected FileContextTestWrapper fcWrapper;
@@ -447,7 +459,6 @@ public class TestEncryptionZones {
     dfsAdmin.createEncryptionZone(zone2, myKeyName, NO_TRASH);
     assertNumZones(++numZones);
     assertZonePresent(myKeyName, zone2.toString());
-
     /* Test failure of create encryption zones as a non super user. */
     final UserGroupInformation user = UserGroupInformation.
         createUserForTesting("user", new String[] { "mygroup" });
@@ -1057,6 +1068,31 @@ public class TestEncryptionZones {
     }
   }
 
+  private class AuthorizationExceptionInjector extends EncryptionFaultInjector {
+    @Override
+    public void ensureKeyIsInitialized() throws IOException {
+      throw new AuthorizationException(AUTHORIZATION_EXCEPTION_MESSAGE);
+    }
+  }
+
+  @Test
+  public void testExceptionInformationReturn() {
+    /* Test exception information can be returned when
+    creating transparent encryption zone.*/
+    final Path zone1 = new Path("/zone1");
+    EncryptionFaultInjector.instance = new AuthorizationExceptionInjector();
+    try {
+      dfsAdmin.createEncryptionZone(zone1, TEST_KEY, NO_TRASH);
+      fail("exception information can be returned when creating " +
+          "transparent encryption zone");
+    } catch (IOException e) {
+      assertTrue(e instanceof RemoteException);
+      assertTrue(((RemoteException) e).unwrapRemoteException()
+          instanceof AuthorizationException);
+      assertExceptionContains(AUTHORIZATION_EXCEPTION_MESSAGE, e);
+    }
+  }
+
   private class MyInjector extends EncryptionFaultInjector {
     volatile int generateCount;
     CountDownLatch ready;
@@ -1956,4 +1992,185 @@ public class TestEncryptionZones {
     Assert.assertEquals(tokens[1], testToken);
     Assert.assertEquals(1, creds.numberOfTokens());
   }
+
+  /**
+   * Creates a file with stable {@link DistributedFileSystem}.
+   * Tests the following 2 scenarios.
+   * 1. The decrypted data using {@link WebHdfsFileSystem} should be same as
+   * input data.
+   * 2. Gets the underlying raw encrypted stream and verifies that the
+   * encrypted data is different than input data.
+   * @throws Exception
+   */
+  @Test
+  public void testWebhdfsRead() throws Exception {
+    Path zonePath = new Path("/TestEncryptionZone");
+    fsWrapper.mkdir(zonePath, FsPermission.getDirDefault(), false);
+    dfsAdmin.createEncryptionZone(zonePath, TEST_KEY, NO_TRASH);
+    final Path encryptedFilePath =
+        new Path("/TestEncryptionZone/encryptedFile.txt");
+    final Path rawPath = 
+        new Path("/.reserved/raw/TestEncryptionZone/encryptedFile.txt");
+    final String content = "hello world";
+
+    // Create a file using DistributedFileSystem.
+    DFSTestUtil.writeFile(fs, encryptedFilePath, content);
+    final FileSystem webhdfs = WebHdfsTestUtil.getWebHdfsFileSystem(conf,
+        WebHdfsConstants.WEBHDFS_SCHEME);
+    // Verify whether decrypted input stream data is same as content.
+    InputStream decryptedIputStream  = webhdfs.open(encryptedFilePath);
+    verifyStreamsSame(content, decryptedIputStream);
+
+    // Get the underlying stream from CryptoInputStream which should be
+    // raw encrypted bytes.
+    InputStream cryptoStream =
+        webhdfs.open(encryptedFilePath).getWrappedStream();
+    Assert.assertTrue("cryptoStream should be an instance of "
+        + "CryptoInputStream", (cryptoStream instanceof CryptoInputStream));
+    InputStream encryptedStream =
+        ((CryptoInputStream)cryptoStream).getWrappedStream();
+    // Verify that the data read from the raw input stream is different
+    // from the original content. Also check it is identical to the raw
+    // encrypted data from dfs.
+    verifyRaw(content, encryptedStream, fs.open(rawPath));
+  }
+
+  private void verifyStreamsSame(String content, InputStream is)
+      throws IOException {
+    byte[] streamBytes;
+    try (ByteArrayOutputStream os = new ByteArrayOutputStream()) {
+      IOUtils.copyBytes(is, os, 1024, true);
+      streamBytes = os.toByteArray();
+    }
+    Assert.assertArrayEquals(content.getBytes(), streamBytes);
+  }
+
+  private void verifyRaw(String content, InputStream is, InputStream rawIs)
+      throws IOException {
+    byte[] streamBytes, rawBytes;
+    try (ByteArrayOutputStream os = new ByteArrayOutputStream()) {
+      IOUtils.copyBytes(is, os, 1024, true);
+      streamBytes = os.toByteArray();
+    }
+    Assert.assertFalse(Arrays.equals(content.getBytes(), streamBytes));
+
+    // webhdfs raw bytes should match the raw bytes from dfs.
+    try (ByteArrayOutputStream os = new ByteArrayOutputStream()) {
+      IOUtils.copyBytes(rawIs, os, 1024, true);
+      rawBytes = os.toByteArray();
+    }
+    Assert.assertArrayEquals(rawBytes, streamBytes);
+  }
+
+  /* Tests that if client is old and namenode is new then the
+   * data will be decrypted by datanode.
+   * @throws Exception
+   */
+  @Test
+  public void testWebhdfsReadOldBehavior() throws Exception {
+    Path zonePath = new Path("/TestEncryptionZone");
+    fsWrapper.mkdir(zonePath, FsPermission.getDirDefault(), false);
+    dfsAdmin.createEncryptionZone(zonePath, TEST_KEY, NO_TRASH);
+    final Path encryptedFilePath = new Path("/TestEncryptionZone/foo");
+    final String content = "hello world";
+    // Create a file using DistributedFileSystem.
+    DFSTestUtil.writeFile(fs, encryptedFilePath, content);
+
+    InetSocketAddress addr = cluster.getNameNode().getHttpAddress();
+    URL url = new URL("http", addr.getHostString(), addr.getPort(),
+        WebHdfsFileSystem.PATH_PREFIX + encryptedFilePath.toString()
+        + "?op=OPEN");
+    // Return a connection with client not supporting EZ.
+    HttpURLConnection namenodeConnection = returnConnection(url, "GET", false);
+    String location = namenodeConnection.getHeaderField("Location");
+    URL datanodeURL = new URL(location);
+    String path = datanodeURL.getPath();
+    Assert.assertEquals(
+        WebHdfsFileSystem.PATH_PREFIX + encryptedFilePath.toString(), path);
+    HttpURLConnection datanodeConnection = returnConnection(datanodeURL,
+        "GET", false);
+    InputStream in = datanodeConnection.getInputStream();
+    // Comparing with the original contents
+    // and making sure they are decrypted.
+    verifyStreamsSame(content, in);
+  }
+
+  /* Tests namenode returns path starting with /.reserved/raw if client
+   * supports EZ and not if otherwise
+   * @throws Exception
+   */
+  @Test
+  public void testWebhfsEZRedirectLocation()
+      throws Exception {
+    Path zonePath = new Path("/TestEncryptionZone");
+    fsWrapper.mkdir(zonePath, FsPermission.getDirDefault(), false);
+    dfsAdmin.createEncryptionZone(zonePath, TEST_KEY, NO_TRASH);
+    final Path encryptedFilePath =
+        new Path("/TestEncryptionZone/foo");
+    final String content = "hello world";
+    // Create a file using DistributedFileSystem.
+    DFSTestUtil.writeFile(fs, encryptedFilePath, content);
+
+    InetSocketAddress addr = cluster.getNameNode().getHttpAddress();
+    URL url = new URL("http", addr.getHostString(), addr.getPort(),
+        WebHdfsFileSystem.PATH_PREFIX + encryptedFilePath.toString()
+        + "?op=OPEN");
+    // Return a connection with client not supporting EZ.
+    HttpURLConnection namenodeConnection =
+        returnConnection(url, "GET", false);
+    Assert.assertNotNull(namenodeConnection.getHeaderField("Location"));
+    URL datanodeUrl = new URL(namenodeConnection.getHeaderField("Location"));
+    Assert.assertNotNull(datanodeUrl);
+    String path = datanodeUrl.getPath();
+    Assert.assertEquals(
+        WebHdfsFileSystem.PATH_PREFIX + encryptedFilePath.toString(), path);
+
+    url = new URL("http", addr.getHostString(), addr.getPort(),
+        WebHdfsFileSystem.PATH_PREFIX + encryptedFilePath.toString()
+        + "?op=OPEN");
+    // Return a connection with client supporting EZ.
+    namenodeConnection = returnConnection(url, "GET", true);
+    Assert.assertNotNull(namenodeConnection.getHeaderField("Location"));
+    datanodeUrl = new URL(namenodeConnection.getHeaderField("Location"));
+    Assert.assertNotNull(datanodeUrl);
+    path = datanodeUrl.getPath();
+    Assert.assertEquals(WebHdfsFileSystem.PATH_PREFIX
+        + "/.reserved/raw" + encryptedFilePath.toString(), path);
+  }
+
+  private static HttpURLConnection returnConnection(URL url,
+      String httpRequestType, boolean supportEZ) throws Exception {
+    HttpURLConnection conn = null;
+    conn = (HttpURLConnection) url.openConnection();
+    conn.setRequestMethod(httpRequestType);
+    conn.setDoOutput(true);
+    conn.setInstanceFollowRedirects(false);
+    if (supportEZ) {
+      conn.setRequestProperty(WebHdfsFileSystem.EZ_HEADER, "true");
+    }
+    return conn;
+  }
+
+  /*
+   * Test seek behavior of the webhdfs input stream which reads data from
+   * encryption zone.
+   */
+  @Test
+  public void testPread() throws Exception {
+    Path zonePath = new Path("/TestEncryptionZone");
+    fsWrapper.mkdir(zonePath, FsPermission.getDirDefault(), false);
+    dfsAdmin.createEncryptionZone(zonePath, TEST_KEY, NO_TRASH);
+    final Path encryptedFilePath =
+        new Path("/TestEncryptionZone/foo");
+    // Create a file using DistributedFileSystem.
+    WebHdfsFileSystem webfs = WebHdfsTestUtil.getWebHdfsFileSystem(conf,
+        WebHdfsConstants.WEBHDFS_SCHEME);
+    DFSTestUtil.createFile(webfs, encryptedFilePath, 1024, (short)1, 0xFEED);
+    byte[] data = DFSTestUtil.readFileAsBytes(fs, encryptedFilePath);
+    FSDataInputStream in = webfs.open(encryptedFilePath);
+    for (int i = 0; i < 1024; i++) {
+      in.seek(i);
+      Assert.assertEquals((data[i] & 0XFF), in.read());
+    }
+  }
 }

+ 7 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingMultipleRacks.java

@@ -21,6 +21,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyRackFaultTolerant;
@@ -163,7 +164,8 @@ public class TestErasureCodingMultipleRacks {
     // Create enough extra DNs on the 2 racks to test even placement.
     // Desired placement is parityUnits replicas on the 2 racks, and 1 replica
     // on the rest of the racks (which only have 1 DN)
-    setupCluster(dataUnits + parityUnits * 4, dataUnits - parityUnits + 2,
+    int numRacks = dataUnits - parityUnits + 2;
+    setupCluster(dataUnits + parityUnits * 4, numRacks,
         dataUnits - parityUnits);
 
     final int filesize = ecPolicy.getNumDataUnits() * ecPolicy.getCellSize();
@@ -173,6 +175,10 @@ public class TestErasureCodingMultipleRacks {
       final Path path = new Path("/testfile" + i);
       LOG.info("Writing file " + path);
       DFSTestUtil.writeFile(dfs, path, contents);
+      ExtendedBlock extendedBlock = DFSTestUtil.getFirstBlock(dfs, path);
+      // Wait for replication to finish before testing
+      DFSTestUtil.waitForReplication(cluster, extendedBlock, numRacks,
+          ecPolicy.getNumDataUnits() + ecPolicy.getNumParityUnits(), 0);
       BlockLocation[] blocks =
           dfs.getFileBlockLocations(path, 0, Long.MAX_VALUE);
       assertEquals(ecPolicy.getNumDataUnits() + ecPolicy.getNumParityUnits(),

+ 13 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/federation/RouterConfigBuilder.java

@@ -35,6 +35,7 @@ public class RouterConfigBuilder {
   private boolean enableStateStore = false;
   private boolean enableMetrics = false;
   private boolean enableQuota = false;
+  private boolean enableSafemode = false;
 
   public RouterConfigBuilder(Configuration configuration) {
     this.conf = configuration;
@@ -52,6 +53,7 @@ public class RouterConfigBuilder {
     this.enableLocalHeartbeat = true;
     this.enableStateStore = true;
     this.enableMetrics = true;
+    this.enableSafemode = true;
     return this;
   }
 
@@ -95,6 +97,11 @@ public class RouterConfigBuilder {
     return this;
   }
 
+  public RouterConfigBuilder safemode(boolean enable) {
+    this.enableSafemode = enable;
+    return this;
+  }
+
   public RouterConfigBuilder rpc() {
     return this.rpc(true);
   }
@@ -123,6 +130,10 @@ public class RouterConfigBuilder {
     return this.quota(true);
   }
 
+  public RouterConfigBuilder safemode() {
+    return this.safemode(true);
+  }
+
   public Configuration build() {
     conf.setBoolean(DFSConfigKeys.DFS_ROUTER_STORE_ENABLE,
         this.enableStateStore);
@@ -139,6 +150,8 @@ public class RouterConfigBuilder {
         this.enableMetrics);
     conf.setBoolean(DFSConfigKeys.DFS_ROUTER_QUOTA_ENABLE,
         this.enableQuota);
+    conf.setBoolean(DFSConfigKeys.DFS_ROUTER_SAFEMODE_ENABLE,
+        this.enableSafemode);
     return conf;
   }
 }

+ 192 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterSafemode.java

@@ -0,0 +1,192 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.federation.router;
+
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ROUTER_CACHE_TIME_TO_LIVE_MS;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ROUTER_SAFEMODE_EXPIRATION;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ROUTER_SAFEMODE_EXTENSION;
+import static org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUtils.deleteStateStore;
+import static org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUtils.getStateStoreConfiguration;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.server.federation.RouterConfigBuilder;
+import org.apache.hadoop.service.Service.STATE;
+import org.apache.hadoop.util.Time;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Test the safe mode for the {@link Router} controlled by
+ * {@link RouterSafemodeService}.
+ */
+public class TestRouterSafemode {
+
+  private Router router;
+  private static Configuration conf;
+
+  @BeforeClass
+  public static void create() throws IOException {
+    // Wipe state store
+    deleteStateStore();
+    // Configuration that supports the state store
+    conf = getStateStoreConfiguration();
+    // 2 sec startup standby
+    conf.setTimeDuration(DFS_ROUTER_SAFEMODE_EXTENSION,
+        TimeUnit.SECONDS.toMillis(2), TimeUnit.MILLISECONDS);
+    // 1 sec cache refresh
+    conf.setTimeDuration(DFS_ROUTER_CACHE_TIME_TO_LIVE_MS,
+        TimeUnit.SECONDS.toMillis(1), TimeUnit.MILLISECONDS);
+    // 2 sec post cache update before entering safemode (2 intervals)
+    conf.setTimeDuration(DFS_ROUTER_SAFEMODE_EXPIRATION,
+        TimeUnit.SECONDS.toMillis(2), TimeUnit.MILLISECONDS);
+    // RPC + State Store + Safe Mode only
+    conf = new RouterConfigBuilder(conf)
+        .rpc()
+        .safemode()
+        .stateStore()
+        .metrics()
+        .build();
+  }
+
+  @AfterClass
+  public static void destroy() {
+  }
+
+  @Before
+  public void setup() throws IOException, URISyntaxException {
+    router = new Router();
+    router.init(conf);
+    router.start();
+  }
+
+  @After
+  public void cleanup() throws IOException {
+    if (router != null) {
+      router.stop();
+      router = null;
+    }
+  }
+
+  @Test
+  public void testSafemodeService() throws IOException {
+    RouterSafemodeService server = new RouterSafemodeService(router);
+    server.init(conf);
+    assertEquals(STATE.INITED, server.getServiceState());
+    server.start();
+    assertEquals(STATE.STARTED, server.getServiceState());
+    server.stop();
+    assertEquals(STATE.STOPPED, server.getServiceState());
+    server.close();
+  }
+
+  @Test
+  public void testRouterExitSafemode()
+      throws InterruptedException, IllegalStateException, IOException {
+
+    assertTrue(router.getRpcServer().isInSafeMode());
+    verifyRouter(RouterServiceState.SAFEMODE);
+
+    // Wait for initial time in milliseconds
+    long interval =
+        conf.getTimeDuration(DFS_ROUTER_SAFEMODE_EXTENSION,
+            TimeUnit.SECONDS.toMillis(2), TimeUnit.MILLISECONDS) +
+        conf.getTimeDuration(DFS_ROUTER_CACHE_TIME_TO_LIVE_MS,
+            TimeUnit.SECONDS.toMillis(1), TimeUnit.MILLISECONDS);
+    Thread.sleep(interval);
+
+    assertFalse(router.getRpcServer().isInSafeMode());
+    verifyRouter(RouterServiceState.RUNNING);
+  }
+
+  @Test
+  public void testRouterEnterSafemode()
+      throws IllegalStateException, IOException, InterruptedException {
+
+    // Verify starting state
+    assertTrue(router.getRpcServer().isInSafeMode());
+    verifyRouter(RouterServiceState.SAFEMODE);
+
+    // We should be in safe mode for DFS_ROUTER_SAFEMODE_EXTENSION time
+    long interval0 = conf.getTimeDuration(DFS_ROUTER_SAFEMODE_EXTENSION,
+        TimeUnit.SECONDS.toMillis(2), TimeUnit.MILLISECONDS) - 1000;
+    long t0 = Time.now();
+    while (Time.now() - t0 < interval0) {
+      verifyRouter(RouterServiceState.SAFEMODE);
+      Thread.sleep(100);
+    }
+
+    // We wait some time for the state to propagate
+    long interval1 = 1000 + 2 * conf.getTimeDuration(
+        DFS_ROUTER_CACHE_TIME_TO_LIVE_MS, TimeUnit.SECONDS.toMillis(1),
+        TimeUnit.MILLISECONDS);
+    Thread.sleep(interval1);
+
+    // Running
+    assertFalse(router.getRpcServer().isInSafeMode());
+    verifyRouter(RouterServiceState.RUNNING);
+
+    // Disable cache
+    router.getStateStore().stopCacheUpdateService();
+
+    // Wait until the State Store cache is stale in milliseconds
+    long interval2 =
+        conf.getTimeDuration(DFS_ROUTER_SAFEMODE_EXPIRATION,
+            TimeUnit.SECONDS.toMillis(2), TimeUnit.MILLISECONDS) +
+        conf.getTimeDuration(DFS_ROUTER_CACHE_TIME_TO_LIVE_MS,
+            TimeUnit.SECONDS.toMillis(1), TimeUnit.MILLISECONDS);
+    Thread.sleep(interval2);
+
+    // Safemode
+    assertTrue(router.getRpcServer().isInSafeMode());
+    verifyRouter(RouterServiceState.SAFEMODE);
+  }
+
+  @Test
+  public void testRouterRpcSafeMode()
+      throws IllegalStateException, IOException {
+
+    assertTrue(router.getRpcServer().isInSafeMode());
+    verifyRouter(RouterServiceState.SAFEMODE);
+
+    // If the Router is in Safe Mode, we should get a SafeModeException
+    boolean exception = false;
+    try {
+      router.getRpcServer().delete("/testfile.txt", true);
+      fail("We should have thrown a safe mode exception");
+    } catch (RouterSafeModeException sme) {
+      exception = true;
+    }
+    assertTrue("We should have thrown a safe mode exception", exception);
+  }
+
+  private void verifyRouter(RouterServiceState status)
+      throws IllegalStateException, IOException {
+    assertEquals(status, router.getRouterState());
+  }
+}

+ 194 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/federation/store/TestStateStoreRouterState.java

@@ -0,0 +1,194 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.federation.store;
+
+import static org.apache.hadoop.hdfs.server.federation.FederationTestUtils.verifyException;
+import static org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUtils.clearRecords;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.server.federation.router.FederationUtil;
+import org.apache.hadoop.hdfs.server.federation.router.RouterServiceState;
+import org.apache.hadoop.hdfs.server.federation.store.protocol.GetRouterRegistrationRequest;
+import org.apache.hadoop.hdfs.server.federation.store.protocol.GetRouterRegistrationsRequest;
+import org.apache.hadoop.hdfs.server.federation.store.protocol.RouterHeartbeatRequest;
+import org.apache.hadoop.hdfs.server.federation.store.records.RouterState;
+import org.apache.hadoop.util.Time;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Test the basic {@link StateStoreService} {@link RouterStore} functionality.
+ */
+public class TestStateStoreRouterState extends TestStateStoreBase {
+
+  private static RouterStore routerStore;
+
+  @BeforeClass
+  public static void create() {
+    // Reduce expirations to 5 seconds
+    getConf().setTimeDuration(
+        DFSConfigKeys.FEDERATION_STORE_ROUTER_EXPIRATION_MS,
+        5, TimeUnit.SECONDS);
+  }
+
+  @Before
+  public void setup() throws IOException, InterruptedException {
+
+    if (routerStore == null) {
+      routerStore =
+          getStateStore().getRegisteredRecordStore(RouterStore.class);
+    }
+
+    // Clear router status registrations
+    assertTrue(clearRecords(getStateStore(), RouterState.class));
+  }
+
+  @Test
+  public void testStateStoreDisconnected() throws Exception {
+
+    // Close the data store driver
+    getStateStore().closeDriver();
+    assertEquals(false, getStateStore().isDriverReady());
+
+    // Test all APIs that access the data store to ensure they throw the correct
+    // exception.
+    GetRouterRegistrationRequest getSingleRequest =
+        GetRouterRegistrationRequest.newInstance();
+    verifyException(routerStore, "getRouterRegistration",
+        StateStoreUnavailableException.class,
+        new Class[] {GetRouterRegistrationRequest.class},
+        new Object[] {getSingleRequest});
+
+    GetRouterRegistrationsRequest getRequest =
+        GetRouterRegistrationsRequest.newInstance();
+    routerStore.loadCache(true);
+    verifyException(routerStore, "getRouterRegistrations",
+        StateStoreUnavailableException.class,
+        new Class[] {GetRouterRegistrationsRequest.class},
+        new Object[] {getRequest});
+
+    RouterHeartbeatRequest hbRequest = RouterHeartbeatRequest.newInstance(
+        RouterState.newInstance("test", 0, RouterServiceState.UNINITIALIZED));
+    verifyException(routerStore, "routerHeartbeat",
+        StateStoreUnavailableException.class,
+        new Class[] {RouterHeartbeatRequest.class},
+        new Object[] {hbRequest});
+  }
+
+  //
+  // Router
+  //
+  @Test
+  public void testUpdateRouterStatus()
+      throws IllegalStateException, IOException {
+
+    long dateStarted = Time.now();
+    String address = "testaddress";
+
+    // Set
+    RouterHeartbeatRequest request = RouterHeartbeatRequest.newInstance(
+        RouterState.newInstance(
+            address, dateStarted, RouterServiceState.RUNNING));
+    assertTrue(routerStore.routerHeartbeat(request).getStatus());
+
+    // Verify
+    GetRouterRegistrationRequest getRequest =
+        GetRouterRegistrationRequest.newInstance(address);
+    RouterState record =
+        routerStore.getRouterRegistration(getRequest).getRouter();
+    assertNotNull(record);
+    assertEquals(RouterServiceState.RUNNING, record.getStatus());
+    assertEquals(address, record.getAddress());
+    assertEquals(FederationUtil.getCompileInfo(), record.getCompileInfo());
+    // Build version may vary a bit
+    assertTrue(record.getBuildVersion().length() > 0);
+  }
+
+  @Test
+  public void testRouterStateExpired()
+      throws IOException, InterruptedException {
+
+    long dateStarted = Time.now();
+    String address = "testaddress";
+
+    RouterHeartbeatRequest request = RouterHeartbeatRequest.newInstance(
+        RouterState.newInstance(
+            address, dateStarted, RouterServiceState.RUNNING));
+    // Set
+    assertTrue(routerStore.routerHeartbeat(request).getStatus());
+
+    // Verify
+    GetRouterRegistrationRequest getRequest =
+        GetRouterRegistrationRequest.newInstance(address);
+    RouterState record =
+        routerStore.getRouterRegistration(getRequest).getRouter();
+    assertNotNull(record);
+
+    // Wait past expiration (set to 5 sec in config)
+    Thread.sleep(6000);
+
+    // Verify expired
+    RouterState r = routerStore.getRouterRegistration(getRequest).getRouter();
+    assertEquals(RouterServiceState.EXPIRED, r.getStatus());
+
+    // Heartbeat again and this shouldn't be EXPIRED anymore
+    assertTrue(routerStore.routerHeartbeat(request).getStatus());
+    r = routerStore.getRouterRegistration(getRequest).getRouter();
+    assertEquals(RouterServiceState.RUNNING, r.getStatus());
+  }
+
+  @Test
+  public void testGetAllRouterStates()
+      throws StateStoreUnavailableException, IOException {
+
+    // Set 2 entries
+    RouterHeartbeatRequest heartbeatRequest1 =
+        RouterHeartbeatRequest.newInstance(
+            RouterState.newInstance(
+                "testaddress1", Time.now(), RouterServiceState.RUNNING));
+    assertTrue(routerStore.routerHeartbeat(heartbeatRequest1).getStatus());
+
+    RouterHeartbeatRequest heartbeatRequest2 =
+        RouterHeartbeatRequest.newInstance(
+            RouterState.newInstance(
+                "testaddress2", Time.now(), RouterServiceState.RUNNING));
+    assertTrue(routerStore.routerHeartbeat(heartbeatRequest2).getStatus());
+
+    // Verify
+    routerStore.loadCache(true);
+    GetRouterRegistrationsRequest request =
+        GetRouterRegistrationsRequest.newInstance();
+    List<RouterState> entries =
+        routerStore.getRouterRegistrations(request).getRouters();
+    assertEquals(2, entries.size());
+    Collections.sort(entries);
+    assertEquals("testaddress1", entries.get(0).getAddress());
+    assertEquals("testaddress2", entries.get(1).getAddress());
+    assertEquals(RouterServiceState.RUNNING, entries.get(0).getStatus());
+    assertEquals(RouterServiceState.RUNNING, entries.get(1).getStatus());
+  }
+}

+ 15 - 8
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/web/resources/TestWebHdfsDataLocality.java

@@ -34,6 +34,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
@@ -101,7 +102,7 @@ public class TestWebHdfsDataLocality {
           //The chosen datanode must be the same as the client address
           final DatanodeInfo chosen = NamenodeWebHdfsMethods.chooseDatanode(
               namenode, f, PutOpParam.Op.CREATE, -1L, blocksize, null,
-              LOCALHOST);
+              LOCALHOST, null);
           Assert.assertEquals(ipAddr, chosen.getIpAddr());
         }
       }
@@ -125,23 +126,26 @@ public class TestWebHdfsDataLocality {
       //the chosen datanode must be the same as the replica location.
 
       { //test GETFILECHECKSUM
+        final HdfsFileStatus status = dfs.getClient().getFileInfo(f);
         final DatanodeInfo chosen = NamenodeWebHdfsMethods.chooseDatanode(
             namenode, f, GetOpParam.Op.GETFILECHECKSUM, -1L, blocksize, null,
-            LOCALHOST);
+            LOCALHOST, status);
         Assert.assertEquals(expected, chosen);
       }
   
       { //test OPEN
+        final HdfsFileStatus status = dfs.getClient().getFileInfo(f);
         final DatanodeInfo chosen = NamenodeWebHdfsMethods.chooseDatanode(
             namenode, f, GetOpParam.Op.OPEN, 0, blocksize, null,
-            LOCALHOST);
+            LOCALHOST, status);
         Assert.assertEquals(expected, chosen);
       }
 
       { //test APPEND
+        final HdfsFileStatus status = dfs.getClient().getFileInfo(f);
         final DatanodeInfo chosen = NamenodeWebHdfsMethods.chooseDatanode(
             namenode, f, PostOpParam.Op.APPEND, -1L, blocksize, null,
-            LOCALHOST);
+            LOCALHOST, status);
         Assert.assertEquals(expected, chosen);
       }
     } finally {
@@ -195,9 +199,10 @@ public class TestWebHdfsDataLocality {
       for (int i = 0; i < 2; i++) {
         sb.append(locations[i].getXferAddr());
         { // test GETFILECHECKSUM
+          final HdfsFileStatus status = dfs.getClient().getFileInfo(f);
           final DatanodeInfo chosen = NamenodeWebHdfsMethods.chooseDatanode(
               namenode, f, GetOpParam.Op.GETFILECHECKSUM, -1L, blocksize,
-              sb.toString(), LOCALHOST);
+              sb.toString(), LOCALHOST, status);
           for (int j = 0; j <= i; j++) {
             Assert.assertNotEquals(locations[j].getHostName(),
                 chosen.getHostName());
@@ -205,9 +210,10 @@ public class TestWebHdfsDataLocality {
         }
 
         { // test OPEN
+          final HdfsFileStatus status = dfs.getClient().getFileInfo(f);
           final DatanodeInfo chosen = NamenodeWebHdfsMethods.chooseDatanode(
               namenode, f, GetOpParam.Op.OPEN, 0, blocksize, sb.toString(),
-              LOCALHOST);
+              LOCALHOST, status);
           for (int j = 0; j <= i; j++) {
             Assert.assertNotEquals(locations[j].getHostName(),
                 chosen.getHostName());
@@ -215,9 +221,10 @@ public class TestWebHdfsDataLocality {
         }
   
         { // test APPEND
+          final HdfsFileStatus status = dfs.getClient().getFileInfo(f);
           final DatanodeInfo chosen = NamenodeWebHdfsMethods
               .chooseDatanode(namenode, f, PostOpParam.Op.APPEND, -1L,
-                  blocksize, sb.toString(), LOCALHOST);
+                  blocksize, sb.toString(), LOCALHOST, status);
           for (int j = 0; j <= i; j++) {
             Assert.assertNotEquals(locations[j].getHostName(),
                 chosen.getHostName());
@@ -238,6 +245,6 @@ public class TestWebHdfsDataLocality {
     exception.expect(IOException.class);
     exception.expectMessage("Namesystem has not been intialized yet.");
     NamenodeWebHdfsMethods.chooseDatanode(nn, "/path", PutOpParam.Op.CREATE, 0,
-        DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT, null, LOCALHOST);
+        DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT, null, LOCALHOST, null);
   }
 }

+ 0 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java

@@ -1435,7 +1435,6 @@ public class TestWebHDFS {
       cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build();
       final WebHdfsFileSystem webfs = WebHdfsTestUtil.getWebHdfsFileSystem(
           conf, WebHdfsConstants.WEBHDFS_SCHEME);
-      NamenodeWebHdfsMethods.resetServerDefaultsResponse();
       FSNamesystem fsnSpy =
           NameNodeAdapter.spyOnNamesystem(cluster.getNameNode());
       Mockito.when(fsnSpy.getServerDefaults()).

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsTokens.java

@@ -385,7 +385,7 @@ public class TestWebHdfsTokens {
     InputStream is = fs.open(p);
     is.read();
     is.close();
-    verify(fs, times(2)).getDelegationToken(); // first bad, then good
+    verify(fs, times(3)).getDelegationToken(); // first bad, then good
     verify(fs, times(1)).replaceExpiredDelegationToken();
     verify(fs, times(1)).getDelegationToken(null);
     verify(fs, times(1)).setDelegationToken(any());
@@ -402,7 +402,7 @@ public class TestWebHdfsTokens {
     is = fs.open(p);
     is.read();
     is.close();
-    verify(fs, times(2)).getDelegationToken(); // first bad, then good
+    verify(fs, times(3)).getDelegationToken(); // first bad, then good
     verify(fs, times(1)).replaceExpiredDelegationToken();
     verify(fs, times(1)).getDelegationToken(null);
     verify(fs, times(1)).setDelegationToken(any());

+ 1 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/LocalContainerLauncher.java

@@ -510,7 +510,7 @@ public class LocalContainerLauncher extends AbstractService implements
           String cause =
               (tCause == null) ? throwable.getMessage() : StringUtils
                   .stringifyException(tCause);
-          umbilical.fatalError(classicAttemptID, cause);
+          umbilical.fatalError(classicAttemptID, cause, false);
         }
         throw new RuntimeException();
       }

+ 9 - 6
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java

@@ -48,6 +48,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.Task;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptDiagnosticsUpdateEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptFailEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent;
 import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler;
@@ -281,7 +282,7 @@ public class TaskAttemptListenerImpl extends CompositeService
   }
 
   @Override
-  public void fatalError(TaskAttemptID taskAttemptID, String msg)
+  public void fatalError(TaskAttemptID taskAttemptID, String msg, boolean fastFail)
       throws IOException {
     // This happens only in Child and in the Task.
     LOG.error("Task: " + taskAttemptID + " - exited : " + msg);
@@ -294,7 +295,7 @@ public class TaskAttemptListenerImpl extends CompositeService
     preemptionPolicy.handleFailedContainer(attemptID);
 
     context.getEventHandler().handle(
-        new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_FAILMSG));
+        new TaskAttemptFailEvent(attemptID, fastFail));
   }
 
   @Override
@@ -312,7 +313,7 @@ public class TaskAttemptListenerImpl extends CompositeService
     preemptionPolicy.handleFailedContainer(attemptID);
 
     context.getEventHandler().handle(
-        new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_FAILMSG));
+        new TaskAttemptFailEvent(attemptID));
   }
 
   @Override
@@ -368,14 +369,16 @@ public class TaskAttemptListenerImpl extends CompositeService
     org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId yarnAttemptID =
         TypeConverter.toYarn(taskAttemptID);
 
+    AMFeedback feedback = new AMFeedback();
     AtomicReference<TaskAttemptStatus> lastStatusRef =
         attemptIdToStatus.get(yarnAttemptID);
     if (lastStatusRef == null) {
-      throw new IllegalStateException("Status update was called"
-          + " with illegal TaskAttemptId: " + yarnAttemptID);
+      LOG.error("Status update was called with illegal TaskAttemptId: "
+          + yarnAttemptID);
+      feedback.setTaskFound(false);
+      return feedback;
     }
 
-    AMFeedback feedback = new AMFeedback();
     feedback.setTaskFound(true);
 
     // Propagating preemption to the task if TASK_PREEMPTION is enabled

+ 2 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java

@@ -206,7 +206,7 @@ class YarnChild {
       if (taskid != null) {
         if (!ShutdownHookManager.get().isShutdownInProgress()) {
           umbilical.fatalError(taskid,
-              StringUtils.stringifyException(exception));
+              StringUtils.stringifyException(exception), false);
         }
       }
     } catch (Throwable throwable) {
@@ -218,7 +218,7 @@ class YarnChild {
           String cause =
               tCause == null ? throwable.getMessage() : StringUtils
                   .stringifyException(tCause);
-          umbilical.fatalError(taskid, cause);
+          umbilical.fatalError(taskid, cause, false);
         }
       }
     } finally {

+ 1 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java

@@ -1400,7 +1400,7 @@ public class MRAppMaster extends CompositeService {
 
   private void cleanUpPreviousJobOutput() {
     // recovered application masters should not remove data from previous job
-    if (!recovered()) {
+    if (!isFirstAttempt() && !recovered()) {
       JobContext jobContext = getJobContextFromConf(getConfig());
       try {
         LOG.info("Starting to clean up previous job's temporary files");

+ 53 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptFailEvent.java

@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.v2.app.job.event;
+
+import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
+
+public class TaskAttemptFailEvent extends TaskAttemptEvent {
+  private boolean fastFail;
+
+  /**
+   * Create a new TaskAttemptFailEvent, with task fastFail disabled.
+   *
+   * @param id the id of the task attempt
+   */
+  public TaskAttemptFailEvent(TaskAttemptId id) {
+    this(id, false);
+  }
+
+  /**
+   * Create a new TaskAttemptFailEvent.
+   *
+   * @param id the id of the task attempt
+   * @param fastFail should the task fastFail or not.
+   */
+  public TaskAttemptFailEvent(TaskAttemptId id, boolean fastFail) {
+    super(id, TaskAttemptEventType.TA_FAILMSG);
+    this.fastFail = fastFail;
+  }
+
+  /**
+   * Check if task should fast fail or retry
+   * @return boolean value where true indicates the task should not retry
+   */
+  public boolean isFastFail() {
+    return fastFail;
+  }
+}

+ 39 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskTAttemptFailedEvent.java

@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.v2.app.job.event;
+
+import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
+
+public class TaskTAttemptFailedEvent extends TaskTAttemptEvent {
+
+  private boolean fastFail;
+
+  public TaskTAttemptFailedEvent(TaskAttemptId id) {
+    this(id, false);
+  }
+
+  public TaskTAttemptFailedEvent(TaskAttemptId id, boolean fastFail) {
+    super(id, TaskEventType.T_ATTEMPT_FAILED);
+    this.fastFail = fastFail;
+  }
+
+  public boolean isFastFail() {
+    return fastFail;
+  }
+}

+ 30 - 10
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java

@@ -94,6 +94,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerLaunched
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptDiagnosticsUpdateEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptFailEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptKillEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptRecoverEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent;
@@ -101,6 +102,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptTooManyFetchFailureEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptFailedEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptKilledEvent;
 import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher;
 import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent;
@@ -194,6 +196,7 @@ public abstract class TaskAttemptImpl implements
   private Locality locality;
   private Avataar avataar;
   private boolean rescheduleNextAttempt = false;
+  private boolean failFast = false;
 
   private static final CleanupContainerTransition
       CLEANUP_CONTAINER_TRANSITION = new CleanupContainerTransition();
@@ -1412,6 +1415,14 @@ public abstract class TaskAttemptImpl implements
   public void setAvataar(Avataar avataar) {
     this.avataar = avataar;
   }
+
+  public void setTaskFailFast(boolean failFast) {
+    this.failFast = failFast;
+  }
+
+  public boolean isTaskFailFast() {
+    return failFast;
+  }
   
   @SuppressWarnings("unchecked")
   public TaskAttemptStateInternal recover(TaskAttemptInfo taInfo,
@@ -1921,9 +1932,12 @@ public abstract class TaskAttemptImpl implements
 
       switch(finalState) {
         case FAILED:
-          taskAttempt.eventHandler.handle(new TaskTAttemptEvent(
-              taskAttempt.attemptId,
-              TaskEventType.T_ATTEMPT_FAILED));
+          boolean fastFail = false;
+          if (event instanceof TaskAttemptFailEvent) {
+            fastFail = ((TaskAttemptFailEvent) event).isFastFail();
+          }
+          taskAttempt.eventHandler.handle(new TaskTAttemptFailedEvent(
+              taskAttempt.attemptId, fastFail));
           break;
         case KILLED:
           taskAttempt.eventHandler.handle(new TaskTAttemptKilledEvent(
@@ -2041,13 +2055,16 @@ public abstract class TaskAttemptImpl implements
 
   private static class FailedTransition implements
       SingleArcTransition<TaskAttemptImpl, TaskAttemptEvent> {
+
+
     @SuppressWarnings("unchecked")
     @Override
     public void transition(TaskAttemptImpl taskAttempt,
         TaskAttemptEvent event) {
       // set the finish time
       taskAttempt.setFinishTime();
-      notifyTaskAttemptFailed(taskAttempt);
+
+      notifyTaskAttemptFailed(taskAttempt, taskAttempt.isTaskFailFast());
     }
   }
 
@@ -2154,8 +2171,8 @@ public abstract class TaskAttemptImpl implements
         LOG.debug("Not generating HistoryFinish event since start event not " +
             "generated for taskAttempt: " + taskAttempt.getID());
       }
-      taskAttempt.eventHandler.handle(new TaskTAttemptEvent(
-          taskAttempt.attemptId, TaskEventType.T_ATTEMPT_FAILED));
+      taskAttempt.eventHandler.handle(new TaskTAttemptFailedEvent(
+          taskAttempt.attemptId));
     }
   }
   
@@ -2332,6 +2349,8 @@ public abstract class TaskAttemptImpl implements
       if (event instanceof TaskAttemptKillEvent) {
         taskAttempt.setRescheduleNextAttempt(
             ((TaskAttemptKillEvent)event).getRescheduleAttempt());
+      } else if (event instanceof TaskAttemptFailEvent) {
+        taskAttempt.setTaskFailFast(((TaskAttemptFailEvent)event).isFastFail());
       }
     }
   }
@@ -2400,12 +2419,13 @@ public abstract class TaskAttemptImpl implements
       // register it to finishing state
       taskAttempt.appContext.getTaskAttemptFinishingMonitor().register(
           taskAttempt.attemptId);
-      notifyTaskAttemptFailed(taskAttempt);
+      notifyTaskAttemptFailed(taskAttempt, false);
     }
   }
 
   @SuppressWarnings("unchecked")
-  private static void notifyTaskAttemptFailed(TaskAttemptImpl taskAttempt) {
+  private static void notifyTaskAttemptFailed(TaskAttemptImpl taskAttempt,
+      boolean fastFail) {
     if (taskAttempt.getLaunchTime() == 0) {
       sendJHStartEventForAssignedFailTask(taskAttempt);
     }
@@ -2419,8 +2439,8 @@ public abstract class TaskAttemptImpl implements
     taskAttempt.eventHandler.handle(new JobHistoryEvent(
         taskAttempt.attemptId.getTaskId().getJobId(), tauce));
 
-    taskAttempt.eventHandler.handle(new TaskTAttemptEvent(
-        taskAttempt.attemptId, TaskEventType.T_ATTEMPT_FAILED));
+    taskAttempt.eventHandler.handle(new TaskTAttemptFailedEvent(
+        taskAttempt.attemptId, fastFail));
 
   }
 

+ 4 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java

@@ -74,6 +74,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskRecoverEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptFailedEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptKilledEvent;
 import org.apache.hadoop.mapreduce.v2.app.metrics.MRAppMetrics;
 import org.apache.hadoop.mapreduce.v2.app.rm.ContainerFailedEvent;
@@ -1054,7 +1055,7 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
 
     @Override
     public TaskStateInternal transition(TaskImpl task, TaskEvent event) {
-      TaskTAttemptEvent castEvent = (TaskTAttemptEvent) event;
+      TaskTAttemptFailedEvent castEvent = (TaskTAttemptFailedEvent) event;
       TaskAttemptId taskAttemptId = castEvent.getTaskAttemptID();
       task.failedAttempts.add(taskAttemptId); 
       if (taskAttemptId.equals(task.commitAttempt)) {
@@ -1068,7 +1069,8 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
       }
       
       task.finishedAttempts.add(taskAttemptId);
-      if (task.failedAttempts.size() < task.maxAttempts) {
+      if (!castEvent.isFastFail()
+          && task.failedAttempts.size() < task.maxAttempts) {
         task.handleTaskAttemptCompletion(
             taskAttemptId, 
             TaskAttemptCompletionEventStatus.FAILED);

+ 4 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java

@@ -487,13 +487,15 @@ public class TestTaskAttemptListenerImpl {
     assertEquals(Phase.REDUCE, status.phase);
   }
 
-  @Test(expected = IllegalStateException.class)
+  @Test
   public void testStatusUpdateFromUnregisteredTask()
       throws IOException, InterruptedException{
     configureMocks();
     startListener(false);
 
-    listener.statusUpdate(attemptID, firstReduceStatus);
+    AMFeedback feedback = listener.statusUpdate(attemptID, firstReduceStatus);
+
+    assertFalse(feedback.getTaskFound());
   }
 
   private void configureMocks() {

+ 3 - 4
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestFail.java

@@ -23,6 +23,7 @@ import java.net.InetSocketAddress;
 import java.util.Iterator;
 import java.util.Map;
 
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptFailEvent;
 import org.junit.Assert;
 
 import org.apache.hadoop.conf.Configuration;
@@ -288,8 +289,7 @@ public class TestFail {
       if (attemptID.getTaskId().getId() == 0) {//check if it is first task
         // send the Fail event
         getContext().getEventHandler().handle(
-            new TaskAttemptEvent(attemptID, 
-                TaskAttemptEventType.TA_FAILMSG));
+            new TaskAttemptFailEvent(attemptID));
       } else {
         getContext().getEventHandler().handle(
             new TaskAttemptEvent(attemptID,
@@ -310,8 +310,7 @@ public class TestFail {
         //check if it is first task's first attempt
         // send the Fail event
         getContext().getEventHandler().handle(
-            new TaskAttemptEvent(attemptID, 
-                TaskAttemptEventType.TA_FAILMSG));
+            new TaskAttemptFailEvent(attemptID));
       } else {
         getContext().getEventHandler().handle(
             new TaskAttemptEvent(attemptID,

+ 4 - 3
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRecovery.java

@@ -38,6 +38,8 @@ import java.util.List;
 import java.util.Map;
 
 import java.util.concurrent.TimeoutException;
+
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptFailEvent;
 import org.junit.Assert;
 
 import org.apache.hadoop.conf.Configuration;
@@ -167,9 +169,8 @@ public class TestRecovery {
     /////////// Play some games with the TaskAttempts of the first task //////
     //send the fail signal to the 1st map task attempt
     app.getContext().getEventHandler().handle(
-        new TaskAttemptEvent(
-            task1Attempt1.getID(),
-            TaskAttemptEventType.TA_FAILMSG));
+        new TaskAttemptFailEvent(
+            task1Attempt1.getID()));
     
     app.waitForState(task1Attempt1, TaskAttemptState.FAILED);
 

+ 2 - 3
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java

@@ -81,7 +81,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
-import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptFailedEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl.InitTransition;
 import org.apache.hadoop.mapreduce.v2.app.metrics.MRAppMetrics;
 import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler;
@@ -437,8 +437,7 @@ public class TestJobImpl {
       TaskImpl task = (TaskImpl) t;
       task.handle(new TaskEvent(task.getID(), TaskEventType.T_SCHEDULE));
       for(TaskAttempt ta: task.getAttempts().values()) {
-        task.handle(new TaskTAttemptEvent(ta.getID(),
-          TaskEventType.T_ATTEMPT_FAILED));
+        task.handle(new TaskTAttemptFailedEvent(ta.getID()));
       }
     }
 

+ 4 - 5
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java

@@ -39,6 +39,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.CopyOnWriteArrayList;
 
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptFailEvent;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.BeforeClass;
@@ -499,7 +500,7 @@ public class TestTaskAttempt{
           new TaskAttemptDiagnosticsUpdateEvent(attemptID,
               "Test Diagnostic Event"));
       getContext().getEventHandler().handle(
-          new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_FAILMSG));
+          new TaskAttemptFailEvent(attemptID));
     }
 
     protected EventHandler<JobHistoryEvent> createJobHistoryHandler(
@@ -1357,8 +1358,7 @@ public class TestTaskAttempt{
     MockEventHandler eventHandler = new MockEventHandler();
     TaskAttemptImpl taImpl = createTaskAttemptImpl(eventHandler);
 
-    taImpl.handle(new TaskAttemptEvent(taImpl.getID(),
-        TaskAttemptEventType.TA_FAILMSG));
+    taImpl.handle(new TaskAttemptFailEvent(taImpl.getID()));
 
     assertEquals("Task attempt is not in FAILED state", taImpl.getState(),
         TaskAttemptState.FAILED);
@@ -1484,8 +1484,7 @@ public class TestTaskAttempt{
     MockEventHandler eventHandler = new MockEventHandler();
     TaskAttemptImpl taImpl = createTaskAttemptImpl(eventHandler);
 
-    taImpl.handle(new TaskAttemptEvent(taImpl.getID(),
-        TaskAttemptEventType.TA_FAILMSG));
+    taImpl.handle(new TaskAttemptFailEvent(taImpl.getID()));
 
     assertEquals("Task attempt is not in RUNNING state", taImpl.getState(),
         TaskAttemptState.FAILED);

+ 22 - 20
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java

@@ -53,6 +53,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptFailedEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptKilledEvent;
 import org.apache.hadoop.mapreduce.v2.app.metrics.MRAppMetrics;
 import org.apache.hadoop.security.Credentials;
@@ -345,8 +346,7 @@ public class TestTaskImpl {
   }
 
   private void failRunningTaskAttempt(TaskAttemptId attemptId) {
-    mockTask.handle(new TaskTAttemptEvent(attemptId, 
-        TaskEventType.T_ATTEMPT_FAILED));
+    mockTask.handle(new TaskTAttemptFailedEvent(attemptId));
     assertTaskRunningState();
   }
   
@@ -612,11 +612,16 @@ public class TestTaskImpl {
     
     // The task should now have succeeded
     assertTaskSucceededState();
-    
+
     // Now complete the first task attempt, after the second has succeeded
-    mockTask.handle(new TaskTAttemptEvent(taskAttempts.get(0).getAttemptId(), 
-        firstAttemptFinishEvent));
-    
+    if (firstAttemptFinishEvent.equals(TaskEventType.T_ATTEMPT_FAILED)) {
+      mockTask.handle(new TaskTAttemptFailedEvent(taskAttempts
+          .get(0).getAttemptId()));
+    } else {
+      mockTask.handle(new TaskTAttemptEvent(taskAttempts.get(0).getAttemptId(),
+          firstAttemptFinishEvent));
+    }
+
     // The task should still be in the succeeded state
     assertTaskSucceededState();
     
@@ -668,8 +673,8 @@ public class TestTaskImpl {
     assertEquals(2, taskAttempts.size());
 
     // speculative attempt retroactively fails from fetch failures
-    mockTask.handle(new TaskTAttemptEvent(taskAttempts.get(1).getAttemptId(),
-        TaskEventType.T_ATTEMPT_FAILED));
+    mockTask.handle(new TaskTAttemptFailedEvent(
+        taskAttempts.get(1).getAttemptId()));
 
     assertTaskScheduledState();
     assertEquals(3, taskAttempts.size());
@@ -683,8 +688,8 @@ public class TestTaskImpl {
     assertEquals(2, taskAttempts.size());
 
     // speculative attempt retroactively fails from fetch failures
-    mockTask.handle(new TaskTAttemptEvent(taskAttempts.get(1).getAttemptId(),
-        TaskEventType.T_ATTEMPT_FAILED));
+    mockTask.handle(new TaskTAttemptFailedEvent(
+        taskAttempts.get(1).getAttemptId()));
 
     assertTaskScheduledState();
     assertEquals(3, taskAttempts.size());
@@ -698,8 +703,8 @@ public class TestTaskImpl {
     assertEquals(2, taskAttempts.size());
 
     // speculative attempt retroactively fails from fetch failures
-    mockTask.handle(new TaskTAttemptEvent(taskAttempts.get(1).getAttemptId(),
-        TaskEventType.T_ATTEMPT_FAILED));
+    mockTask.handle(new TaskTAttemptFailedEvent(
+        taskAttempts.get(1).getAttemptId()));
 
     assertTaskScheduledState();
     assertEquals(3, taskAttempts.size());
@@ -734,8 +739,8 @@ public class TestTaskImpl {
     // have the first attempt fail, verify task failed due to no retries
     MockTaskAttemptImpl taskAttempt = taskAttempts.get(0);
     taskAttempt.setState(TaskAttemptState.FAILED);
-    mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(),
-        TaskEventType.T_ATTEMPT_FAILED));
+    mockTask.handle(new TaskTAttemptFailedEvent(
+        taskAttempt.getAttemptId()));
     assertEquals(TaskState.FAILED, mockTask.getState());
 
     // verify task can no longer be killed
@@ -757,8 +762,7 @@ public class TestTaskImpl {
         TaskEventType.T_ATTEMPT_COMMIT_PENDING));
     assertEquals(TaskState.FAILED, mockTask.getState());
     taskAttempt.setState(TaskAttemptState.FAILED);
-    mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(),
-        TaskEventType.T_ATTEMPT_FAILED));
+    mockTask.handle(new TaskTAttemptFailedEvent(taskAttempt.getAttemptId()));
     assertEquals(TaskState.FAILED, mockTask.getState());
     taskAttempt = taskAttempts.get(2);
     taskAttempt.setState(TaskAttemptState.SUCCEEDED);
@@ -808,8 +812,7 @@ public class TestTaskImpl {
     // max attempts is 4
     MockTaskAttemptImpl taskAttempt = taskAttempts.get(0);
     taskAttempt.setState(TaskAttemptState.FAILED);
-    mockTask.handle(new TaskTAttemptEvent(taskAttempt.getAttemptId(),
-        TaskEventType.T_ATTEMPT_FAILED));
+    mockTask.handle(new TaskTAttemptFailedEvent(taskAttempt.getAttemptId()));
     assertEquals(TaskState.RUNNING, mockTask.getState());
 
     // verify a new attempt(#3) added because the speculative attempt(#2)
@@ -829,8 +832,7 @@ public class TestTaskImpl {
     // hasn't reach the max attempts which is 4
     MockTaskAttemptImpl taskAttempt1 = taskAttempts.get(1);
     taskAttempt1.setState(TaskAttemptState.FAILED);
-    mockTask.handle(new TaskTAttemptEvent(taskAttempt1.getAttemptId(),
-        TaskEventType.T_ATTEMPT_FAILED));
+    mockTask.handle(new TaskTAttemptFailedEvent(taskAttempt1.getAttemptId()));
     assertEquals(TaskState.RUNNING, mockTask.getState());
 
     // verify there's no new attempt added because of the running attempt(#3)

+ 2 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalJobRunner.java

@@ -729,9 +729,9 @@ public class LocalJobRunner implements ClientProtocol {
       LOG.error("shuffleError: "+ message + "from task: " + taskId);
     }
     
-    public synchronized void fatalError(TaskAttemptID taskId, String msg) 
+    public synchronized void fatalError(TaskAttemptID taskId, String msg, boolean fastFail)
     throws IOException {
-      LOG.error("Fatal: "+ msg + "from task: " + taskId);
+      LOG.error("Fatal: "+ msg + " from task: " + taskId + " fast fail: " + fastFail);
     }
     
     @Override

+ 2 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java

@@ -1568,7 +1568,8 @@ public class MapTask extends Task {
         if (lspillException instanceof Error) {
           final String logMsg = "Task " + getTaskID() + " failed : " +
             StringUtils.stringifyException(lspillException);
-          mapTask.reportFatalError(getTaskID(), lspillException, logMsg);
+          mapTask.reportFatalError(getTaskID(), lspillException, logMsg,
+              false);
         }
         throw new IOException("Spill failed", lspillException);
       }

+ 95 - 6
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java

@@ -40,6 +40,7 @@ import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.LocalDirAllocator;
 import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.Path;
@@ -354,7 +355,7 @@ abstract public class Task implements Writable, Configurable {
    * Report a fatal error to the parent (task) tracker.
    */
   protected void reportFatalError(TaskAttemptID id, Throwable throwable, 
-                                  String logMsg) {
+                                  String logMsg, boolean fastFail) {
     LOG.error(logMsg);
     
     if (ShutdownHookManager.get().isShutdownInProgress()) {
@@ -366,7 +367,7 @@ abstract public class Task implements Writable, Configurable {
                    ? StringUtils.stringifyException(throwable)
                    : StringUtils.stringifyException(tCause);
     try {
-      umbilical.fatalError(id, cause);
+      umbilical.fatalError(id, cause, fastFail);
     } catch (IOException ioe) {
       LOG.error("Failed to contact the tasktracker", ioe);
       System.exit(-1);
@@ -652,6 +653,8 @@ abstract public class Task implements Writable, Configurable {
     private Thread pingThread = null;
     private boolean done = true;
     private Object lock = new Object();
+    private volatile String diskLimitCheckStatus = null;
+    private Thread diskLimitCheckThread = null;
 
     /**
      * flag that indicates whether progress update needs to be sent to parent.
@@ -748,6 +751,65 @@ abstract public class Task implements Writable, Configurable {
       }
     }
 
+    /**
+     * disk limit checker, runs in separate thread when activated.
+     */
+    public class DiskLimitCheck implements Runnable {
+      private LocalFileSystem localFS;
+      private long fsLimit;
+      private long checkInterval;
+      private String[] localDirs;
+      private boolean killOnLimitExceeded;
+
+      public DiskLimitCheck(JobConf conf) throws IOException {
+        this.localFS = FileSystem.getLocal(conf);
+        this.fsLimit = conf.getLong(MRJobConfig.JOB_SINGLE_DISK_LIMIT_BYTES,
+            MRJobConfig.DEFAULT_JOB_SINGLE_DISK_LIMIT_BYTES);
+        this.localDirs = conf.getLocalDirs();
+        this.checkInterval = conf.getLong(
+            MRJobConfig.JOB_SINGLE_DISK_LIMIT_CHECK_INTERVAL_MS,
+            MRJobConfig.DEFAULT_JOB_SINGLE_DISK_LIMIT_CHECK_INTERVAL_MS);
+        this.killOnLimitExceeded = conf.getBoolean(
+            MRJobConfig.JOB_SINGLE_DISK_LIMIT_KILL_LIMIT_EXCEED,
+            MRJobConfig.DEFAULT_JOB_SINGLE_DISK_LIMIT_KILL_LIMIT_EXCEED);
+      }
+
+      @Override
+      public void run() {
+        while (!taskDone.get()) {
+          try {
+            long localWritesSize = 0L;
+            String largestWorkDir = null;
+            for (String local : localDirs) {
+              long size = FileUtil.getDU(localFS.pathToFile(new Path(local)));
+              if (localWritesSize < size) {
+                localWritesSize = size;
+                largestWorkDir = local;
+              }
+            }
+            if (localWritesSize > fsLimit) {
+              String localStatus =
+                  "too much data in local scratch dir="
+                      + largestWorkDir
+                      + ". current size is "
+                      + localWritesSize
+                      + " the limit is " + fsLimit;
+              if (killOnLimitExceeded) {
+                LOG.error(localStatus);
+                diskLimitCheckStatus = localStatus;
+              } else {
+                LOG.warn(localStatus);
+              }
+              break;
+            }
+            Thread.sleep(checkInterval);
+          } catch (Exception e) {
+            LOG.error(e.getMessage(), e);
+          }
+        }
+      }
+    }
+
     /**
      * check the counters to see whether the task has exceeded any configured
      * limits.
@@ -773,6 +835,9 @@ abstract public class Task implements Writable, Configurable {
                   " the limit is " + limit);
         }
       }
+      if (diskLimitCheckStatus != null) {
+        throw new TaskLimitException(diskLimitCheckStatus);
+      }
     }
 
     /**
@@ -790,6 +855,9 @@ abstract public class Task implements Writable, Configurable {
       long taskProgressInterval = MRJobConfUtil.
           getTaskProgressReportInterval(conf);
 
+      boolean uberized = conf.getBoolean("mapreduce.task.uberized",
+          false);
+
       while (!taskDone.get()) {
         synchronized (lock) {
           done = false;
@@ -828,9 +896,14 @@ abstract public class Task implements Writable, Configurable {
           // if Task Tracker is not aware of our task ID (probably because it died and 
           // came back up), kill ourselves
           if (!taskFound) {
-            LOG.warn("Parent died.  Exiting "+taskId);
-            resetDoneFlag();
-            System.exit(66);
+            if (uberized) {
+              taskDone.set(true);
+              break;
+            } else {
+              LOG.warn("Parent died.  Exiting "+taskId);
+              resetDoneFlag();
+              System.exit(66);
+            }
           }
 
           // Set a flag that says we should preempt this is read by
@@ -851,7 +924,7 @@ abstract public class Task implements Writable, Configurable {
                   StringUtils.stringifyException(e);
           LOG.error(errMsg);
           try {
-            umbilical.fatalError(taskId, errMsg);
+            umbilical.fatalError(taskId, errMsg, true);
           } catch (IOException ioe) {
             LOG.error("Failed to update failure diagnosis", ioe);
           }
@@ -884,6 +957,22 @@ abstract public class Task implements Writable, Configurable {
         pingThread.setDaemon(true);
         pingThread.start();
       }
+      startDiskLimitCheckerThreadIfNeeded();
+    }
+    public void startDiskLimitCheckerThreadIfNeeded() {
+      if (diskLimitCheckThread == null && conf.getLong(
+          MRJobConfig.JOB_SINGLE_DISK_LIMIT_BYTES,
+          MRJobConfig.DEFAULT_JOB_SINGLE_DISK_LIMIT_BYTES) >= 0) {
+        try {
+          diskLimitCheckThread = new Thread(new DiskLimitCheck(conf),
+              "disk limit check thread");
+          diskLimitCheckThread.setDaemon(true);
+          diskLimitCheckThread.start();
+        } catch (IOException e) {
+          LOG.error("Issues starting disk monitor thread: "
+              + e.getMessage(), e);
+        }
+      }
     }
     public void stopCommunicationThread() throws InterruptedException {
       if (pingThread != null) {

+ 9 - 3
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java

@@ -68,9 +68,10 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
    * Version 18 Added numRequiredSlots to TaskStatus for MAPREDUCE-516
    * Version 19 Added fatalError for child to communicate fatal errors to TT
    * Version 20 Added methods to manage checkpoints
+   * Version 21 Added fastFail parameter to fatalError
    * */
 
-  public static final long versionID = 20L;
+  public static final long versionID = 21L;
   
   /**
    * Called when a child task process starts, to get its task.
@@ -140,8 +141,13 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
   /** Report that the task encounted a local filesystem error.*/
   void fsError(TaskAttemptID taskId, String message) throws IOException;
 
-  /** Report that the task encounted a fatal error.*/
-  void fatalError(TaskAttemptID taskId, String message) throws IOException;
+  /**
+   * Report that the task encounted a fatal error.
+   * @param taskId task's id
+   * @param message fail message
+   * @param fastFail flag to enable fast fail for task
+   */
+  void fatalError(TaskAttemptID taskId, String message, boolean fastFail) throws IOException;
   
   /** Called by a reduce task to get the map output locations for finished maps.
    * Returns an update centered around the map-task-completion-events. 

+ 3 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/pipes/Application.java

@@ -45,6 +45,7 @@ import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapred.TaskAttemptID;
 import org.apache.hadoop.mapred.TaskLog;
+import org.apache.hadoop.mapreduce.MRConfig;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.filecache.DistributedCache;
 import org.apache.hadoop.mapreduce.security.SecureShuffleUtils;
@@ -103,8 +104,8 @@ class Application<K1 extends WritableComparable, V1 extends Writable,
     // This password is used as shared secret key between this application and
     // child pipes process
     byte[]  password = jobToken.getPassword();
-    String localPasswordFile = new File(".") + Path.SEPARATOR
-        + "jobTokenPassword";
+    String localPasswordFile = new File(conf.get(MRConfig.LOCAL_DIR))
+        + Path.SEPARATOR + "jobTokenPassword";
     writePasswordToLocalFile(localPasswordFile, password, conf);
     env.put("hadoop.pipes.shared.secret.location", localPasswordFile);
  

+ 14 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java

@@ -52,6 +52,20 @@ public interface MRJobConfig {
 
   public static final String TASK_CLEANUP_NEEDED = "mapreduce.job.committer.task.cleanup.needed";
 
+  public static final String JOB_SINGLE_DISK_LIMIT_BYTES =
+          "mapreduce.job.local-fs.single-disk-limit.bytes";
+  // negative values disable the limit
+  public static final long DEFAULT_JOB_SINGLE_DISK_LIMIT_BYTES = -1;
+
+  public static final String JOB_SINGLE_DISK_LIMIT_KILL_LIMIT_EXCEED =
+      "mapreduce.job.local-fs.single-disk-limit.check.kill-limit-exceed";
+  // setting to false only logs the kill
+  public static final boolean DEFAULT_JOB_SINGLE_DISK_LIMIT_KILL_LIMIT_EXCEED = true;
+
+  public static final String JOB_SINGLE_DISK_LIMIT_CHECK_INTERVAL_MS =
+      "mapreduce.job.local-fs.single-disk-limit.check.interval-ms";
+  public static final long DEFAULT_JOB_SINGLE_DISK_LIMIT_CHECK_INTERVAL_MS = 5000;
+
   public static final String TASK_LOCAL_WRITE_LIMIT_BYTES =
           "mapreduce.task.local-fs.write-limit.bytes";
   // negative values disable the limit

+ 22 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml

@@ -62,6 +62,28 @@
   set to less than .5</description>
 </property>
 
+<property>
+  <name>mapreduce.job.local-fs.single-disk-limit.bytes</name>
+  <value>-1</value>
+  <description>Enable an in task monitor thread to watch for single disk
+    consumption by jobs. By setting this to x nr of bytes, the task will fast
+    fail in case it is reached. This is a per disk configuration.</description>
+</property>
+
+<property>
+  <name>mapreduce.job.local-fs.single-disk-limit.check.interval-ms</name>
+  <value>5000</value>
+  <description>Interval of disk limit check to run in ms.</description>
+</property>
+
+<property>
+  <name>mapreduce.job.local-fs.single-disk-limit.check.kill-limit-exceed</name>
+  <value>true</value>
+  <description>If mapreduce.job.local-fs.single-disk-limit.bytes is triggered
+    should the task be killed or logged. If false the intent to kill the task
+    is only logged in the container logs.</description>
+</property>
+
 <property>
   <name>mapreduce.job.maps</name>
   <value>2</value>

+ 88 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestTaskProgressReporter.java

@@ -18,15 +18,19 @@
 
 package org.apache.hadoop.mapred;
 
+import java.io.File;
 import java.io.IOException;
 import java.util.Random;
 
+import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.ipc.ProtocolSignature;
 import org.apache.hadoop.mapred.SortedRanges.Range;
+import org.apache.hadoop.mapreduce.MRConfig;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
 import org.apache.hadoop.util.ExitUtil;
@@ -43,6 +47,11 @@ public class TestTaskProgressReporter {
 
   private FakeUmbilical fakeUmbilical = new FakeUmbilical();
 
+  private static final String TEST_DIR =
+      System.getProperty("test.build.data",
+          System.getProperty("java.io.tmpdir")) + "/" +
+      TestTaskProgressReporter.class.getName();
+
   private static class DummyTask extends Task {
     @Override
     public void run(JobConf job, TaskUmbilicalProtocol umbilical)
@@ -53,6 +62,11 @@ public class TestTaskProgressReporter {
     public boolean isMapTask() {
       return true;
     }
+
+    @Override
+    public boolean isCommitRequired() {
+      return false;
+    }
   }
 
   private static class FakeUmbilical implements TaskUmbilicalProtocol {
@@ -118,7 +132,7 @@ public class TestTaskProgressReporter {
     }
 
     @Override
-    public void fatalError(TaskAttemptID taskId, String message)
+    public void fatalError(TaskAttemptID taskId, String message, boolean fastFail)
         throws IOException {
     }
 
@@ -163,6 +177,78 @@ public class TestTaskProgressReporter {
     }
   }
 
+  @Test(timeout=60000)
+  public void testScratchDirSize() throws Exception {
+    String tmpPath = TEST_DIR + "/testBytesWrittenLimit-tmpFile-"
+        + new Random(System.currentTimeMillis()).nextInt();
+    File data = new File(tmpPath + "/out");
+    File testDir = new File(tmpPath);
+    testDir.mkdirs();
+    testDir.deleteOnExit();
+    JobConf conf = new JobConf();
+    conf.setStrings(MRConfig.LOCAL_DIR, "file://" + tmpPath);
+    conf.setLong(MRJobConfig.JOB_SINGLE_DISK_LIMIT_BYTES, 1024L);
+    conf.setBoolean(MRJobConfig.JOB_SINGLE_DISK_LIMIT_KILL_LIMIT_EXCEED,
+        true);
+    getBaseConfAndWriteToFile(-1, data);
+    testScratchDirLimit(false, conf);
+    data.delete();
+    getBaseConfAndWriteToFile(100, data);
+    testScratchDirLimit(false, conf);
+    data.delete();
+    getBaseConfAndWriteToFile(1536, data);
+    testScratchDirLimit(true, conf);
+    conf.setBoolean(MRJobConfig.JOB_SINGLE_DISK_LIMIT_KILL_LIMIT_EXCEED,
+        false);
+    testScratchDirLimit(false, conf);
+    conf.setBoolean(MRJobConfig.JOB_SINGLE_DISK_LIMIT_KILL_LIMIT_EXCEED,
+        true);
+    conf.setLong(MRJobConfig.JOB_SINGLE_DISK_LIMIT_BYTES, -1L);
+    testScratchDirLimit(false, conf);
+    data.delete();
+    FileUtil.fullyDelete(testDir);
+  }
+
+  private void getBaseConfAndWriteToFile(int size, File data)
+      throws IOException {
+    if (size > 0) {
+      byte[] b = new byte[size];
+      for (int i = 0; i < size; i++) {
+        b[i] = 1;
+      }
+      FileUtils.writeByteArrayToFile(data, b);
+    }
+  }
+
+  public void testScratchDirLimit(boolean fastFail, JobConf conf)
+          throws Exception {
+    ExitUtil.disableSystemExit();
+    threadExited = false;
+    Thread.UncaughtExceptionHandler h = new Thread.UncaughtExceptionHandler() {
+      public void uncaughtException(Thread th, Throwable ex) {
+        if (ex instanceof ExitUtil.ExitException) {
+          threadExited = true;
+          th.interrupt();
+        }
+      }
+    };
+    Task task = new DummyTask();
+    task.setConf(conf);
+    DummyTaskReporter reporter = new DummyTaskReporter(task);
+    reporter.startDiskLimitCheckerThreadIfNeeded();
+    Thread t = new Thread(reporter);
+    t.setUncaughtExceptionHandler(h);
+    reporter.setProgressFlag();
+    t.start();
+    while (!reporter.taskLimitIsChecked) {
+      Thread.yield();
+    }
+    task.done(fakeUmbilical, reporter);
+    reporter.resetDoneFlag();
+    t.join(1000L);
+    Assert.assertEquals(fastFail, threadExited);
+  }
+
   @Test (timeout=10000)
   public void testTaskProgress() throws Exception {
     JobConf job = new JobConf();
@@ -214,7 +300,7 @@ public class TestTaskProgressReporter {
     conf.getLong(MRJobConfig.TASK_PROGRESS_REPORT_INTERVAL, 0);
     conf.setLong(MRJobConfig.TASK_LOCAL_WRITE_LIMIT_BYTES, limit);
     LocalFileSystem localFS = FileSystem.getLocal(conf);
-    Path tmpPath = new Path("/tmp/testBytesWrittenLimit-tmpFile-"
+    Path tmpPath = new Path(TEST_DIR + "/testBytesWrittenLimit-tmpFile-"
             + new Random(System.currentTimeMillis()).nextInt());
     FSDataOutputStream out = localFS.create(tmpPath, true);
     out.write(new byte[LOCAL_BYTES_WRITTEN]);

+ 7 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CachedHistoryStorage.java

@@ -173,9 +173,14 @@ public class CachedHistoryStorage extends AbstractService implements
     HistoryFileInfo fileInfo;
 
     fileInfo = hsManager.getFileInfo(jobId);
+
     if (fileInfo == null) {
       throw new HSFileRuntimeException("Unable to find job " + jobId);
-    } else if (fileInfo.isDeleted()) {
+    }
+
+    fileInfo.waitUntilMoved();
+
+    if (fileInfo.isDeleted()) {
       throw new HSFileRuntimeException("Cannot load deleted job " + jobId);
     } else {
       return fileInfo.loadJob();
@@ -211,6 +216,7 @@ public class CachedHistoryStorage extends AbstractService implements
       for (HistoryFileInfo mi : hsManager.getAllFileInfo()) {
         if (mi != null) {
           JobId id = mi.getJobId();
+          mi.waitUntilMoved();
           result.put(id, new PartialJob(mi.getJobIndexInfo(), id));
         }
       }

+ 24 - 6
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java

@@ -452,6 +452,8 @@ public class HistoryFileManager extends AbstractService {
       } catch (Throwable t) {
         LOG.error("Error while trying to move a job to done", t);
         this.state = HistoryInfoState.MOVE_FAILED;
+      } finally {
+        notifyAll();
       }
     }
 
@@ -485,12 +487,16 @@ public class HistoryFileManager extends AbstractService {
     }
     
     protected synchronized void delete() throws IOException {
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("deleting " + historyFile + " and " + confFile);
+      try {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("deleting " + historyFile + " and " + confFile);
+        }
+        state = HistoryInfoState.DELETED;
+        doneDirFc.delete(doneDirFc.makeQualified(historyFile), false);
+        doneDirFc.delete(doneDirFc.makeQualified(confFile), false);
+      } finally {
+        notifyAll();
       }
-      state = HistoryInfoState.DELETED;
-      doneDirFc.delete(doneDirFc.makeQualified(historyFile), false);
-      doneDirFc.delete(doneDirFc.makeQualified(confFile), false);
     }
 
     public JobIndexInfo getJobIndexInfo() {
@@ -517,6 +523,17 @@ public class HistoryFileManager extends AbstractService {
           jobIndexInfo.getNumMaps();
       return (maxTasksForLoadedJob > 0) && (totalTasks > maxTasksForLoadedJob);
     }
+
+    public synchronized void waitUntilMoved() {
+      while (isMovePending() && !didMoveFail()) {
+        try {
+          wait();
+        } catch (InterruptedException e) {
+          LOG.warn("Waiting has been interrupted");
+          throw new RuntimeException(e);
+        }
+      }
+    }
   }
 
   private SerialNumberIndex serialNumberIndex = null;
@@ -956,6 +973,7 @@ public class HistoryFileManager extends AbstractService {
           if (LOG.isDebugEnabled()) {
             LOG.debug("Scheduling move to done of " +found);
           }
+
           moveToDoneExecutor.execute(new Runnable() {
             @Override
             public void run() {
@@ -1193,5 +1211,5 @@ public class HistoryFileManager extends AbstractService {
   @VisibleForTesting
   void setMaxHistoryAge(long newValue){
     maxHistoryAge=newValue;
-  } 
+  }
 }

+ 26 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistory.java

@@ -445,6 +445,32 @@ public class TestJobHistory {
     verify(fileInfo, timeout(20000).times(2)).delete();
   }
 
+  @Test
+  public void testCachedStorageWaitsForFileMove() throws IOException {
+    HistoryFileManager historyManager = mock(HistoryFileManager.class);
+    jobHistory = spy(new JobHistory());
+    doReturn(historyManager).when(jobHistory).createHistoryFileManager();
+
+    Configuration conf = new Configuration();
+    jobHistory.init(conf);
+    jobHistory.start();
+
+    CachedHistoryStorage storage = spy((CachedHistoryStorage) jobHistory
+        .getHistoryStorage());
+
+    Job job  = mock(Job.class);
+    JobId jobId  = mock(JobId.class);
+    when(job.getID()).thenReturn(jobId);
+    when(job.getTotalMaps()).thenReturn(10);
+    when(job.getTotalReduces()).thenReturn(2);
+    HistoryFileInfo fileInfo = mock(HistoryFileInfo.class);
+    when(historyManager.getFileInfo(eq(jobId))).thenReturn(fileInfo);
+    when(fileInfo.loadJob()).thenReturn(job);
+
+    storage.getFullJob(jobId);
+    verify(fileInfo).waitUntilMoved();
+  }
+
   @Test
   public void testRefreshLoadedJobCacheUnSupportedOperation() {
     jobHistory = spy(new JobHistory());

+ 5 - 4
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java

@@ -36,6 +36,7 @@ import java.util.Map;
 import java.util.StringTokenizer;
 import java.util.concurrent.atomic.AtomicInteger;
 
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptFailEvent;
 import org.junit.Assert;
 
 import org.apache.hadoop.conf.Configuration;
@@ -712,7 +713,7 @@ public class TestJobHistoryParsing {
     protected void attemptLaunched(TaskAttemptId attemptID) {
       if (attemptID.getTaskId().getId() == 0 && attemptID.getId() == 0) {
         getContext().getEventHandler().handle(
-            new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_FAILMSG));
+            new TaskAttemptFailEvent(attemptID));
       } else {
         getContext().getEventHandler().handle(
             new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_DONE));
@@ -732,7 +733,7 @@ public class TestJobHistoryParsing {
     protected void attemptLaunched(TaskAttemptId attemptID) {
       if (attemptID.getTaskId().getId() == 0) {
         getContext().getEventHandler().handle(
-            new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_FAILMSG));
+            new TaskAttemptFailEvent(attemptID));
       } else {
         getContext().getEventHandler().handle(
             new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_DONE));
@@ -760,10 +761,10 @@ public class TestJobHistoryParsing {
             new TaskEvent(attemptID.getTaskId(), TaskEventType.T_KILL));
       } else if (taskType == TaskType.MAP && taskId == 1) {
         getContext().getEventHandler().handle(
-            new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_FAILMSG));
+            new TaskAttemptFailEvent(attemptID));
       } else if (taskType == TaskType.REDUCE && taskId == 0) {
         getContext().getEventHandler().handle(
-            new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_FAILMSG));
+            new TaskAttemptFailEvent(attemptID));
       } else if (taskType == TaskType.REDUCE && taskId == 1) {
         getContext().getEventHandler().handle(
             new TaskEvent(attemptID.getTaskId(), TaskEventType.T_KILL));

+ 2 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapProgress.java

@@ -91,8 +91,8 @@ public class TestMapProgress {
       LOG.info("Task " + taskId + " reporting shuffle error: " + message);
     }
 
-    public void fatalError(TaskAttemptID taskId, String msg) throws IOException {
-      LOG.info("Task " + taskId + " reporting fatal error: " + msg);
+    public void fatalError(TaskAttemptID taskId, String msg, boolean fastFail) throws IOException {
+      LOG.info("Task " + taskId + " reporting fatal error: " + msg + " fast fail: " + fastFail);
     }
 
     public JvmTask getTask(JvmContext context) throws IOException {

+ 1 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestTaskCommit.java

@@ -124,7 +124,7 @@ public class TestTaskCommit extends HadoopTestCase {
     }
 
     @Override
-    public void fatalError(TaskAttemptID taskId, String message)
+    public void fatalError(TaskAttemptID taskId, String message, boolean fastFail)
         throws IOException { }
 
     @Override

+ 11 - 8
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/pipes/TestPipeApplication.java

@@ -47,6 +47,7 @@ import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.mapred.IFile.Writer;
+import org.apache.hadoop.mapreduce.MRConfig;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.security.TokenCache;
 import org.apache.hadoop.mapred.Counters;
@@ -83,10 +84,10 @@ public class TestPipeApplication {
   public void testRunner() throws Exception {
 
     // clean old password files
-    File[] psw = cleanTokenPasswordFile();
+    JobConf conf = new JobConf();
+    File[] psw = cleanTokenPasswordFile(conf);
     try {
       RecordReader<FloatWritable, NullWritable> rReader = new ReaderPipesMapRunner();
-      JobConf conf = new JobConf();
       conf.set(Submitter.IS_JAVA_RR, "true");
       // for stdour and stderror
 
@@ -162,7 +163,7 @@ public class TestPipeApplication {
 
     TestTaskReporter reporter = new TestTaskReporter();
 
-    File[] psw = cleanTokenPasswordFile();
+    File[] psw = cleanTokenPasswordFile(conf);
     try {
 
       conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskName);
@@ -247,7 +248,7 @@ public class TestPipeApplication {
 
     JobConf conf = new JobConf();
 
-    File[] psw = cleanTokenPasswordFile();
+    File[] psw = cleanTokenPasswordFile(conf);
 
     System.setProperty("test.build.data",
             "target/tmp/build/TEST_SUBMITTER_MAPPER/data");
@@ -388,8 +389,8 @@ public class TestPipeApplication {
   @Test
   public void testPipesReduser() throws Exception {
 
-    File[] psw = cleanTokenPasswordFile();
     JobConf conf = new JobConf();
+    File[] psw = cleanTokenPasswordFile(conf);
     try {
       Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>(
               "user".getBytes(), "password".getBytes(), new Text("kind"), new Text(
@@ -506,14 +507,16 @@ public class TestPipeApplication {
 
   }
 
-  private File[] cleanTokenPasswordFile() throws Exception {
+  private File[] cleanTokenPasswordFile(JobConf conf) throws Exception {
     File[] result = new File[2];
-    result[0] = new File("./jobTokenPassword");
+    result[0] = new File(conf.get(MRConfig.LOCAL_DIR) + Path.SEPARATOR
+        + "jobTokenPassword");
     if (result[0].exists()) {
       FileUtil.chmod(result[0].getAbsolutePath(), "700");
       assertTrue(result[0].delete());
     }
-    result[1] = new File("./.jobTokenPassword.crc");
+    result[1] = new File(conf.get(MRConfig.LOCAL_DIR) + Path.SEPARATOR
+        + ".jobTokenPassword.crc");
     if (result[1].exists()) {
       FileUtil.chmod(result[1].getAbsolutePath(), "700");
       result[1].delete();

+ 2 - 1
hadoop-tools/hadoop-azure-datalake/pom.xml

@@ -33,6 +33,7 @@
     <minimalJsonVersion>0.9.1</minimalJsonVersion>
     <file.encoding>UTF-8</file.encoding>
     <downloadSources>true</downloadSources>
+    <azure.data.lake.store.sdk.version>2.2.5</azure.data.lake.store.sdk.version>
   </properties>
   <build>
     <plugins>
@@ -109,7 +110,7 @@
     <dependency>
       <groupId>com.microsoft.azure</groupId>
       <artifactId>azure-data-lake-store-sdk</artifactId>
-      <version>2.2.5</version>
+      <version>${azure.data.lake.store.sdk.version}</version>
     </dependency>
     <!--  ENDS HERE-->
     <dependency>

+ 34 - 41
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java

@@ -57,7 +57,6 @@ import org.apache.hadoop.tools.rumen.TaskAttemptInfo;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
-import org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.NodeState;
 import org.apache.hadoop.yarn.api.records.ReservationId;
@@ -444,7 +443,7 @@ public class SLSRunner extends Configured implements Tool {
 
     for (int i = 0; i < jobCount; i++) {
       runNewAM(amType, user, queue, oldAppId, jobStartTime, jobFinishTime,
-          getTaskContainers(jsonJob), null, getAMContainerResource(jsonJob));
+          getTaskContainers(jsonJob), getAMContainerResource(jsonJob));
     }
   }
 
@@ -607,7 +606,7 @@ public class SLSRunner extends Configured implements Tool {
 
     // Only supports the default job type currently
     runNewAM(SLSUtils.DEFAULT_JOB_TYPE, user, jobQueue, oldJobId,
-        jobStartTimeMS, jobFinishTimeMS, containerList, null,
+        jobStartTimeMS, jobFinishTimeMS, containerList,
         getAMContainerResource(null));
   }
 
@@ -628,10 +627,6 @@ public class SLSRunner extends Configured implements Tool {
     localConf.set("fs.defaultFS", "file:///");
     long baselineTimeMS = 0;
 
-    // reservations use wall clock time, so need to have a reference for that
-    UTCClock clock = new UTCClock();
-    long now = clock.getTime();
-
     try {
 
       // if we use the nodeFile this could have been not initialized yet.
@@ -670,13 +665,12 @@ public class SLSRunner extends Configured implements Tool {
         ArrayList<NodeId> keyAsArray = new ArrayList<NodeId>(nmMap.keySet());
         Random rand = new Random(stjp.getSeed());
 
-        Resource maxMapRes = Resource.newInstance(0, 0);
-        long maxMapDur = 0;
         // map tasks
         for (int i = 0; i < job.getNumberMaps(); i++) {
           TaskAttemptInfo tai = job.getTaskAttemptInfo(TaskType.MAP, i, 0);
-          RMNode node = nmMap
-              .get(keyAsArray.get(rand.nextInt(keyAsArray.size()))).getNode();
+          RMNode node =
+              nmMap.get(keyAsArray.get(rand.nextInt(keyAsArray.size())))
+                  .getNode();
           String hostname = "/" + node.getRackName() + "/" + node.getHostName();
           long containerLifeTime = tai.getRuntime();
           Resource containerResource =
@@ -684,55 +678,39 @@ public class SLSRunner extends Configured implements Tool {
                   (int) tai.getTaskInfo().getTaskVCores());
           containerList.add(new ContainerSimulator(containerResource,
               containerLifeTime, hostname, DEFAULT_MAPPER_PRIORITY, "map"));
-          maxMapRes = Resources.componentwiseMax(maxMapRes, containerResource);
-          maxMapDur =
-              containerLifeTime > maxMapDur ? containerLifeTime : maxMapDur;
-
         }
 
-        Resource maxRedRes = Resource.newInstance(0, 0);
-        long maxRedDur = 0;
         // reduce tasks
         for (int i = 0; i < job.getNumberReduces(); i++) {
           TaskAttemptInfo tai = job.getTaskAttemptInfo(TaskType.REDUCE, i, 0);
-          RMNode node = nmMap
-              .get(keyAsArray.get(rand.nextInt(keyAsArray.size()))).getNode();
+          RMNode node =
+              nmMap.get(keyAsArray.get(rand.nextInt(keyAsArray.size())))
+                  .getNode();
           String hostname = "/" + node.getRackName() + "/" + node.getHostName();
           long containerLifeTime = tai.getRuntime();
           Resource containerResource =
               Resource.newInstance((int) tai.getTaskInfo().getTaskMemory(),
                   (int) tai.getTaskInfo().getTaskVCores());
-          containerList.add(new ContainerSimulator(containerResource,
-              containerLifeTime, hostname, DEFAULT_REDUCER_PRIORITY, "reduce"));
-          maxRedRes = Resources.componentwiseMax(maxRedRes, containerResource);
-          maxRedDur =
-              containerLifeTime > maxRedDur ? containerLifeTime : maxRedDur;
-
+          containerList.add(
+              new ContainerSimulator(containerResource, containerLifeTime,
+                  hostname, DEFAULT_REDUCER_PRIORITY, "reduce"));
         }
 
-        // generating reservations for the jobs that require them
+        ReservationId reservationId = null;
 
-        ReservationSubmissionRequest rr = null;
         if (job.hasDeadline()) {
-          ReservationId reservationId =
+          reservationId =
               ReservationId.newInstance(this.rm.getStartTime(), AM_ID);
-
-          rr = ReservationClientUtil.createMRReservation(reservationId,
-              "reservation_" + AM_ID, maxMapRes, job.getNumberMaps(), maxMapDur,
-              maxRedRes, job.getNumberReduces(), maxRedDur,
-              now + jobStartTimeMS, now + job.getDeadline(),
-              job.getQueueName());
-
         }
 
         runNewAM(SLSUtils.DEFAULT_JOB_TYPE, user, jobQueue, oldJobId,
-            jobStartTimeMS, jobFinishTimeMS, containerList, rr,
-            getAMContainerResource(null));
+            jobStartTimeMS, jobFinishTimeMS, containerList, reservationId,
+            job.getDeadline(), getAMContainerResource(null));
+
       }
     } finally {
       stjp.close();
     }
-
   }
 
   private Resource getAMContainerResource(Map jsonJob) {
@@ -772,7 +750,17 @@ public class SLSRunner extends Configured implements Tool {
   private void runNewAM(String jobType, String user,
       String jobQueue, String oldJobId, long jobStartTimeMS,
       long jobFinishTimeMS, List<ContainerSimulator> containerList,
-      ReservationSubmissionRequest rr, Resource amContainerResource) {
+      Resource amContainerResource) {
+    runNewAM(jobType, user, jobQueue, oldJobId, jobStartTimeMS,
+        jobFinishTimeMS, containerList, null,  -1,
+        amContainerResource);
+  }
+
+  private void runNewAM(String jobType, String user,
+      String jobQueue, String oldJobId, long jobStartTimeMS,
+      long jobFinishTimeMS, List<ContainerSimulator> containerList,
+      ReservationId reservationId, long deadline,
+      Resource amContainerResource) {
 
     AMSimulator amSim = (AMSimulator) ReflectionUtils.newInstance(
         amClassMap.get(jobType), new Configuration());
@@ -787,10 +775,15 @@ public class SLSRunner extends Configured implements Tool {
         oldJobId = Integer.toString(AM_ID);
       }
       AM_ID++;
-
       amSim.init(heartbeatInterval, containerList, rm, this, jobStartTimeMS,
-          jobFinishTimeMS, user, jobQueue, isTracked, oldJobId, rr,
+          jobFinishTimeMS, user, jobQueue, isTracked, oldJobId,
           runner.getStartTimeMS(), amContainerResource);
+      if(reservationId != null) {
+        // if we have a ReservationId, delegate reservation creation to
+        // AMSim (reservation shape is impl specific)
+        UTCClock clock = new UTCClock();
+        amSim.initReservation(reservationId, deadline, clock.getTime());
+      }
       runner.schedule(amSim);
       maxRuntime = Math.max(maxRuntime, jobFinishTimeMS);
       numTasks += containerList.size();

+ 23 - 18
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java

@@ -85,7 +85,7 @@ public abstract class AMSimulator extends TaskRunner.Task {
   protected final BlockingQueue<AllocateResponse> responseQueue;
   private int responseId = 0;
   // user name
-  protected String user;  
+  private String user;
   // queue name
   protected String queue;
   // am type
@@ -105,7 +105,7 @@ public abstract class AMSimulator extends TaskRunner.Task {
   // waiting for AM container
   volatile boolean isAMContainerRunning = false;
   volatile Container amContainer;
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(AMSimulator.class);
 
   private Resource amContainerResource;
@@ -120,9 +120,8 @@ public abstract class AMSimulator extends TaskRunner.Task {
   public void init(int heartbeatInterval,
       List<ContainerSimulator> containerList, ResourceManager resourceManager,
       SLSRunner slsRunnner, long startTime, long finishTime, String simUser,
-      String simQueue, boolean tracked, String oldApp,
-      ReservationSubmissionRequest rr, long baseTimeMS,
-      Resource amContainerResource) {
+      String simQueue, boolean tracked, String oldApp, long baseTimeMS,
+      Resource amResource) {
     super.init(startTime, startTime + 1000000L * heartbeatInterval,
         heartbeatInterval);
     this.user = simUser;
@@ -134,8 +133,7 @@ public abstract class AMSimulator extends TaskRunner.Task {
     this.baselineTimeMS = baseTimeMS;
     this.traceStartTimeMS = startTime;
     this.traceFinishTimeMS = finishTime;
-    this.reservationRequest = rr;
-    this.amContainerResource = amContainerResource;
+    this.amContainerResource = amResource;
   }
 
   /**
@@ -171,6 +169,10 @@ public abstract class AMSimulator extends TaskRunner.Task {
     isAMContainerRunning = true;
   }
 
+  protected void setReservationRequest(ReservationSubmissionRequest rr){
+    this.reservationRequest = rr;
+  }
+
   private ReservationId submitReservationWhenSpecified()
       throws IOException, InterruptedException {
     if (reservationRequest != null) {
@@ -256,7 +258,7 @@ public abstract class AMSimulator extends TaskRunner.Task {
               simulateStartTimeMS, simulateFinishTimeMS);
     }
   }
-  
+
   protected ResourceRequest createResourceRequest(
           Resource resource, String host, int priority, int numContainers) {
     ResourceRequest request = recordFactory
@@ -269,7 +271,7 @@ public abstract class AMSimulator extends TaskRunner.Task {
     request.setPriority(prio);
     return request;
   }
-  
+
   protected AllocateRequest createAllocateRequest(List<ResourceRequest> ask,
       List<ContainerId> toRelease) {
     AllocateRequest allocateRequest =
@@ -279,36 +281,39 @@ public abstract class AMSimulator extends TaskRunner.Task {
     allocateRequest.setReleaseList(toRelease);
     return allocateRequest;
   }
-  
+
   protected AllocateRequest createAllocateRequest(List<ResourceRequest> ask) {
     return createAllocateRequest(ask, new ArrayList<ContainerId>());
   }
 
   protected abstract void processResponseQueue() throws Exception;
-  
+
   protected abstract void sendContainerRequest() throws Exception;
-  
+
+  public abstract void initReservation(
+      ReservationId reservationId, long deadline, long now);
+
   protected abstract void checkStop();
-  
+
   private void submitApp(ReservationId reservationId)
           throws YarnException, InterruptedException, IOException {
     // ask for new application
     GetNewApplicationRequest newAppRequest =
         Records.newRecord(GetNewApplicationRequest.class);
-    GetNewApplicationResponse newAppResponse = 
+    GetNewApplicationResponse newAppResponse =
         rm.getClientRMService().getNewApplication(newAppRequest);
     appId = newAppResponse.getApplicationId();
-    
+
     // submit the application
     final SubmitApplicationRequest subAppRequest =
         Records.newRecord(SubmitApplicationRequest.class);
-    ApplicationSubmissionContext appSubContext = 
+    ApplicationSubmissionContext appSubContext =
         Records.newRecord(ApplicationSubmissionContext.class);
     appSubContext.setApplicationId(appId);
     appSubContext.setMaxAppAttempts(1);
     appSubContext.setQueue(queue);
     appSubContext.setPriority(Priority.newInstance(0));
-    ContainerLaunchContext conLauContext = 
+    ContainerLaunchContext conLauContext =
         Records.newRecord(ContainerLaunchContext.class);
     conLauContext.setApplicationACLs(new HashMap<>());
     conLauContext.setCommands(new ArrayList<>());
@@ -379,7 +384,7 @@ public abstract class AMSimulator extends TaskRunner.Task {
       }
     }
   }
-  
+
   protected List<ResourceRequest> packageRequests(
           List<ContainerSimulator> csList, int priority) {
     // create requests

+ 55 - 19
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java

@@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.sls.appmaster;
 import java.io.IOException;
 import java.security.PrivilegedExceptionAction;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
@@ -34,6 +35,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest;
 import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ReservationId;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
 import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -42,8 +44,10 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
+import org.apache.hadoop.yarn.sls.ReservationClientUtil;
 import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator;
 import org.apache.hadoop.yarn.sls.SLSRunner;
+import org.apache.hadoop.yarn.util.resource.Resources;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -51,51 +55,51 @@ import org.slf4j.LoggerFactory;
 @Unstable
 public class MRAMSimulator extends AMSimulator {
   /*
-  Vocabulary Used: 
+  Vocabulary Used:
   pending -> requests which are NOT yet sent to RM
   scheduled -> requests which are sent to RM but not yet assigned
   assigned -> requests which are assigned to a container
   completed -> request corresponding to which container has completed
-  
+
   Maps are scheduled as soon as their requests are received. Reduces are
   scheduled when all maps have finished (not support slow-start currently).
   */
-  
+
   private static final int PRIORITY_REDUCE = 10;
   private static final int PRIORITY_MAP = 20;
 
   // pending maps
   private LinkedList<ContainerSimulator> pendingMaps =
           new LinkedList<>();
-  
+
   // pending failed maps
   private LinkedList<ContainerSimulator> pendingFailedMaps =
           new LinkedList<ContainerSimulator>();
-  
+
   // scheduled maps
   private LinkedList<ContainerSimulator> scheduledMaps =
           new LinkedList<ContainerSimulator>();
-  
+
   // assigned maps
   private Map<ContainerId, ContainerSimulator> assignedMaps =
           new HashMap<ContainerId, ContainerSimulator>();
-  
+
   // reduces which are not yet scheduled
   private LinkedList<ContainerSimulator> pendingReduces =
           new LinkedList<ContainerSimulator>();
-  
+
   // pending failed reduces
   private LinkedList<ContainerSimulator> pendingFailedReduces =
           new LinkedList<ContainerSimulator>();
- 
+
   // scheduled reduces
   private LinkedList<ContainerSimulator> scheduledReduces =
           new LinkedList<ContainerSimulator>();
-  
+
   // assigned reduces
   private Map<ContainerId, ContainerSimulator> assignedReduces =
           new HashMap<ContainerId, ContainerSimulator>();
-  
+
   // all maps & reduces
   private LinkedList<ContainerSimulator> allMaps =
           new LinkedList<ContainerSimulator>();
@@ -117,14 +121,14 @@ public class MRAMSimulator extends AMSimulator {
   @SuppressWarnings("checkstyle:parameternumber")
   public void init(int heartbeatInterval,
       List<ContainerSimulator> containerList, ResourceManager rm, SLSRunner se,
-      long traceStartTime, long traceFinishTime, String user, String queue, 
-      boolean isTracked, String oldAppId, ReservationSubmissionRequest rr,
-      long baselineStartTimeMS, Resource amContainerResource) {
+      long traceStartTime, long traceFinishTime, String user, String queue,
+      boolean isTracked, String oldAppId, long baselineStartTimeMS,
+      Resource amContainerResource) {
     super.init(heartbeatInterval, containerList, rm, se,
         traceStartTime, traceFinishTime, user, queue, isTracked, oldAppId,
-        rr, baselineStartTimeMS, amContainerResource);
+        baselineStartTimeMS, amContainerResource);
     amtype = "mapreduce";
-    
+
     // get map/reduce tasks
     for (ContainerSimulator cs : containerList) {
       if (cs.getType().equals("map")) {
@@ -202,7 +206,7 @@ public class MRAMSimulator extends AMSimulator {
           }
         }
       }
-      
+
       // check finished
       if (isAMContainerRunning &&
               (mapFinished >= mapTotal) &&
@@ -234,7 +238,7 @@ public class MRAMSimulator extends AMSimulator {
       }
     }
   }
-  
+
   /**
    * restart running because of the am container killed
    */
@@ -322,7 +326,7 @@ public class MRAMSimulator extends AMSimulator {
     if (ask == null) {
       ask = new ArrayList<>();
     }
-    
+
     final AllocateRequest request = createAllocateRequest(ask);
     if (totalContainers == 0) {
       request.setProgress(1.0f);
@@ -348,6 +352,38 @@ public class MRAMSimulator extends AMSimulator {
     }
   }
 
+  @Override
+  public void initReservation(ReservationId reservationId, long deadline,
+      long now) {
+
+    Resource mapRes = getMaxResource(allMaps);
+    long mapDur = getMaxDuration(allMaps);
+    Resource redRes = getMaxResource(allReduces);
+    long redDur = getMaxDuration(allReduces);
+
+    ReservationSubmissionRequest rr = ReservationClientUtil.
+        createMRReservation(reservationId,
+            "reservation_" + reservationId.getId(), mapRes, allMaps.size(),
+            mapDur, redRes, allReduces.size(), redDur, now + traceStartTimeMS,
+            now + deadline, queue);
+
+    setReservationRequest(rr);
+  }
+
+  // Helper to compute the component-wise maximum resource used by any container
+  private Resource getMaxResource(Collection<ContainerSimulator> containers) {
+    return containers.parallelStream()
+        .map(ContainerSimulator::getResource)
+        .reduce(Resource.newInstance(0, 0), Resources::componentwiseMax);
+  }
+
+  // Helper to compute the maximum resource used by any map container
+  private long getMaxDuration(Collection<ContainerSimulator> containers) {
+    return containers.parallelStream()
+        .mapToLong(ContainerSimulator::getLifeTime)
+        .reduce(0L, Long::max);
+  }
+
   @Override
   protected void checkStop() {
     if (isFinished) {

+ 2 - 2
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NMSimulator.java

@@ -71,7 +71,7 @@ public class NMSimulator extends TaskRunner.Task {
   // resource manager
   private ResourceManager rm;
   // heart beat response id
-  private int RESPONSE_ID = 1;
+  private int responseId = 0;
   private final static Logger LOG = LoggerFactory.getLogger(NMSimulator.class);
   
   public void init(String nodeIdStr, Resource nodeResource,
@@ -131,7 +131,7 @@ public class NMSimulator extends TaskRunner.Task {
     ns.setContainersStatuses(generateContainerStatusList());
     ns.setNodeId(node.getNodeID());
     ns.setKeepAliveApplications(new ArrayList<ApplicationId>());
-    ns.setResponseId(RESPONSE_ID ++);
+    ns.setResponseId(responseId++);
     ns.setNodeHealthStatus(NodeHealthStatus.newInstance(true, "", 0));
     beatRequest.setNodeStatus(ns);
     NodeHeartbeatResponse beatResponse =

+ 2 - 9
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java

@@ -144,8 +144,8 @@ public class NodeInfo {
       return runningApplications;
     }
 
-    public void updateNodeHeartbeatResponseForCleanup(
-            NodeHeartbeatResponse response) {
+    public void setAndUpdateNodeHeartbeatResponse(
+        NodeHeartbeatResponse response) {
     }
 
     public NodeHeartbeatResponse getLastNodeHeartBeatResponse() {
@@ -178,13 +178,6 @@ public class NodeInfo {
       return RMNodeLabelsManager.EMPTY_STRING_SET;
     }
 
-    @Override
-    public void updateNodeHeartbeatResponseForUpdatedContainers(
-        NodeHeartbeatResponse response) {
-      // TODO Auto-generated method stub
-      
-    }
-
     @Override
     public List<Container> pullNewlyIncreasedContainers() {
       // TODO Auto-generated method stub

+ 3 - 9
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java

@@ -127,9 +127,9 @@ public class RMNodeWrapper implements RMNode {
   }
 
   @Override
-  public void updateNodeHeartbeatResponseForCleanup(
-          NodeHeartbeatResponse nodeHeartbeatResponse) {
-    node.updateNodeHeartbeatResponseForCleanup(nodeHeartbeatResponse);
+  public void setAndUpdateNodeHeartbeatResponse(
+      NodeHeartbeatResponse nodeHeartbeatResponse) {
+    node.setAndUpdateNodeHeartbeatResponse(nodeHeartbeatResponse);
   }
 
   @Override
@@ -167,12 +167,6 @@ public class RMNodeWrapper implements RMNode {
     return RMNodeLabelsManager.EMPTY_STRING_SET;
   }
 
-  @Override
-  public void updateNodeHeartbeatResponseForUpdatedContainers(
-      NodeHeartbeatResponse response) {
-    // TODO Auto-generated method stub
-  }
-
   @SuppressWarnings("unchecked")
   @Override
   public List<Container> pullNewlyIncreasedContainers() {

+ 1 - 1
hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSLSRunner.java

@@ -78,7 +78,7 @@ public class TestSLSRunner extends BaseSLSRunnerTest {
     exitInvariantFile = "src/test/resources/exit-invariants.txt";
   }
 
-  @Test(timeout = 60000)
+  @Test(timeout = 90000)
   @SuppressWarnings("all")
   public void testSimulatorRunning() throws Exception {
     Configuration conf = new Configuration(false);

+ 6 - 1
hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/appmaster/TestAMSimulator.java

@@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.sls.appmaster;
 
 import com.codahale.metrics.MetricRegistry;
 import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.yarn.api.records.ReservationId;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
@@ -89,6 +90,10 @@ public class TestAMSimulator {
         throws YarnException, IOException, InterruptedException {
     }
 
+    @Override
+    public void initReservation(ReservationId id, long deadline, long now){
+    }
+
     @Override
     protected void checkStop() {
     }
@@ -134,7 +139,7 @@ public class TestAMSimulator {
     String queue = "default";
     List<ContainerSimulator> containers = new ArrayList<>();
     app.init(1000, containers, rm, null, 0, 1000000L, "user1", queue, true,
-        appId, null, 0, SLSConfiguration.getAMContainerResource(conf));
+        appId, 0, SLSConfiguration.getAMContainerResource(conf));
     app.firstStep();
 
     verifySchedulerMetrics(appId);

+ 4 - 6
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

@@ -1357,22 +1357,20 @@ public class YarnConfiguration extends Configuration {
   public static final String NM_MEMORY_RESOURCE_PREFIX = NM_PREFIX
       + "resource.memory.";
 
-  @Private
   public static final String NM_MEMORY_RESOURCE_ENABLED =
       NM_MEMORY_RESOURCE_PREFIX + "enabled";
-  @Private
   public static final boolean DEFAULT_NM_MEMORY_RESOURCE_ENABLED = false;
 
-  @Private
+  public static final String NM_MEMORY_RESOURCE_ENFORCED =
+      NM_MEMORY_RESOURCE_PREFIX + "enforced";
+  public static final boolean DEFAULT_NM_MEMORY_RESOURCE_ENFORCED = true;
+
   public static final String NM_MEMORY_RESOURCE_CGROUPS_SWAPPINESS =
       NM_MEMORY_RESOURCE_PREFIX + "cgroups.swappiness";
-  @Private
   public static final int DEFAULT_NM_MEMORY_RESOURCE_CGROUPS_SWAPPINESS = 0;
 
-  @Private
   public static final String NM_MEMORY_RESOURCE_CGROUPS_SOFT_LIMIT_PERCENTAGE =
       NM_MEMORY_RESOURCE_PREFIX + "cgroups.soft-limit-percentage";
-  @Private
   public static final float
       DEFAULT_NM_MEMORY_RESOURCE_CGROUPS_SOFT_LIMIT_PERCENTAGE =
       90.0f;

+ 0 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfigurationFields.java

@@ -158,8 +158,6 @@ public class TestYarnConfigurationFields extends TestConfigurationFieldsBase {
         .add(YarnConfiguration.NM_NETWORK_RESOURCE_OUTBOUND_BANDWIDTH_YARN_MBIT);
     configurationPrefixToSkipCompare
         .add(YarnConfiguration.NM_DISK_RESOURCE_ENABLED);
-    configurationPrefixToSkipCompare
-        .add(YarnConfiguration.NM_MEMORY_RESOURCE_PREFIX);
     configurationPrefixToSkipCompare
         .add(YarnConfiguration.NM_CPU_RESOURCE_ENABLED);
     configurationPrefixToSkipCompare.add(

+ 3 - 8
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java

@@ -392,14 +392,9 @@ public class ServiceScheduler extends CompositeService {
     // ZK
     globalTokens.put(ServiceApiConstants.CLUSTER_ZK_QUORUM, getConfig()
         .getTrimmed(KEY_REGISTRY_ZK_QUORUM, DEFAULT_REGISTRY_ZK_QUORUM));
-    String user = null;
-    try {
-      user = UserGroupInformation.getCurrentUser().getShortUserName();
-    } catch (IOException e) {
-      LOG.error("Failed to get user.", e);
-    }
-    globalTokens
-        .put(SERVICE_ZK_PATH, ServiceRegistryUtils.mkServiceHomePath(user, app.getName()));
+    String user = RegistryUtils.currentUser();
+    globalTokens.put(SERVICE_ZK_PATH,
+        ServiceRegistryUtils.mkServiceHomePath(user, app.getName()));
 
     globalTokens.put(ServiceApiConstants.USER, user);
     String dnsDomain = getConfig().getTrimmed(KEY_DNS_DOMAIN);

+ 11 - 11
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/AbstractLauncher.java

@@ -39,8 +39,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 
-import static org.apache.hadoop.yarn.service.provider.docker.DockerKeys.DEFAULT_DOCKER_NETWORK;
-
 /**
  * Launcher of applications: base class
  */
@@ -60,9 +58,9 @@ public class AbstractLauncher {
   private final Map<String, ByteBuffer> serviceData = new HashMap<>();
   protected boolean yarnDockerMode = false;
   protected String dockerImage;
-  protected String dockerNetwork = DEFAULT_DOCKER_NETWORK;
+  protected String dockerNetwork;
   protected String dockerHostname;
-  protected String runPrivilegedContainer;
+  protected boolean runPrivilegedContainer = false;
   private ServiceContext context;
 
   public AbstractLauncher(ServiceContext context) {
@@ -145,10 +143,16 @@ public class AbstractLauncher {
       Map<String, String> env = containerLaunchContext.getEnvironment();
       env.put("YARN_CONTAINER_RUNTIME_TYPE", "docker");
       env.put("YARN_CONTAINER_RUNTIME_DOCKER_IMAGE", dockerImage);
-      env.put("YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_NETWORK", dockerNetwork);
+      if (ServiceUtils.isSet(dockerNetwork)) {
+        env.put("YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_NETWORK",
+            dockerNetwork);
+      }
       env.put("YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_HOSTNAME",
           dockerHostname);
-      env.put("YARN_CONTAINER_RUNTIME_DOCKER_RUN_PRIVILEGED_CONTAINER", runPrivilegedContainer);
+      if (runPrivilegedContainer) {
+        env.put("YARN_CONTAINER_RUNTIME_DOCKER_RUN_PRIVILEGED_CONTAINER",
+            "true");
+      }
       StringBuilder sb = new StringBuilder();
       for (Entry<String,String> mount : mountPaths.entrySet()) {
         if (sb.length() > 0) {
@@ -238,11 +242,7 @@ public class AbstractLauncher {
   }
 
   public void setRunPrivilegedContainer(boolean runPrivilegedContainer) {
-    if (runPrivilegedContainer) {
-      this.runPrivilegedContainer = Boolean.toString(true);
-    } else {
-      this.runPrivilegedContainer = Boolean.toString(false);
-    }
+    this.runPrivilegedContainer = runPrivilegedContainer;
   }
 
 }

+ 0 - 7
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/docker/DockerKeys.java

@@ -18,13 +18,6 @@
 package org.apache.hadoop.yarn.service.provider.docker;
 
 public interface DockerKeys {
-  String PROVIDER_DOCKER = "docker";
   String DOCKER_PREFIX = "docker.";
-  String DOCKER_IMAGE = DOCKER_PREFIX + "image";
   String DOCKER_NETWORK = DOCKER_PREFIX + "network";
-  String DOCKER_USE_PRIVILEGED = DOCKER_PREFIX + "usePrivileged";
-  String DOCKER_START_COMMAND = DOCKER_PREFIX + "startCommand";
-
-  String DEFAULT_DOCKER_NETWORK = "bridge";
-  Boolean DEFAULT_DOCKER_USE_PRIVILEGED = false;
 }

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/docker/DockerProviderService.java

@@ -37,7 +37,7 @@ public class DockerProviderService extends AbstractProviderService
     launcher.setYarnDockerMode(true);
     launcher.setDockerImage(compInstance.getCompSpec().getArtifact().getId());
     launcher.setDockerNetwork(compInstance.getCompSpec().getConfiguration()
-        .getProperty(DOCKER_NETWORK, DEFAULT_DOCKER_NETWORK));
+        .getProperty(DOCKER_NETWORK));
     String domain = compInstance.getComponent().getScheduler().getConfig()
         .get(RegistryConstants.KEY_DNS_DOMAIN);
     String hostname;

+ 11 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/ServiceApiUtil.java

@@ -61,6 +61,9 @@ public class ServiceApiUtil {
   private static final PatternValidator namePattern
       = new PatternValidator("[a-z][a-z0-9-]*");
 
+  private static final PatternValidator userNamePattern
+      = new PatternValidator("[a-z][a-z0-9-.]*");
+
   @VisibleForTesting
   public static void setJsonSerDeser(JsonSerDeser jsd) {
     jsonSerDeser = jsd;
@@ -72,11 +75,15 @@ public class ServiceApiUtil {
       IOException {
     boolean dnsEnabled = conf.getBoolean(RegistryConstants.KEY_DNS_ENABLED,
         RegistryConstants.DEFAULT_DNS_ENABLED);
-    if (dnsEnabled && RegistryUtils.currentUser().length() > RegistryConstants
-        .MAX_FQDN_LABEL_LENGTH) {
-      throw new IllegalArgumentException(RestApiErrorMessages
-          .ERROR_USER_NAME_INVALID);
+    if (dnsEnabled) {
+      if (RegistryUtils.currentUser().length()
+          > RegistryConstants.MAX_FQDN_LABEL_LENGTH) {
+        throw new IllegalArgumentException(
+            RestApiErrorMessages.ERROR_USER_NAME_INVALID);
+      }
+      userNamePattern.validate(RegistryUtils.currentUser());
     }
+
     if (StringUtils.isEmpty(service.getName())) {
       throw new IllegalArgumentException(
           RestApiErrorMessages.ERROR_APPLICATION_NAME_INVALID);

+ 3 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java

@@ -468,6 +468,9 @@ public class ProcfsBasedProcessTree extends ResourceCalculatorProcessTree {
   @Override
   public float getCpuUsagePercent() {
     BigInteger processTotalJiffies = getTotalProcessJiffies();
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Process " + pid + " jiffies:" + processTotalJiffies);
+    }
     cpuTimeTracker.updateElapsedJiffies(processTotalJiffies,
         clock.getTime());
     return cpuTimeTracker.getCpuTrackerUsagePercent();

+ 9 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java

@@ -27,6 +27,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability.Evolving;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.yarn.exceptions.YarnException;
 
 /**
  * Interface class to obtain process resource usage
@@ -50,6 +51,13 @@ public abstract class ResourceCalculatorProcessTree extends Configured {
   public ResourceCalculatorProcessTree(String root) {
   }
 
+  /**
+   * Initialize the object.
+   * @throws YarnException Throws an exception on error.
+   */
+  public void initialize() throws YarnException {
+  }
+
   /**
    * Update the process-tree with latest state.
    *
@@ -168,6 +176,7 @@ public abstract class ResourceCalculatorProcessTree extends Configured {
         Constructor <? extends ResourceCalculatorProcessTree> c = clazz.getConstructor(String.class);
         ResourceCalculatorProcessTree rctree = c.newInstance(pid);
         rctree.setConf(conf);
+        rctree.initialize();
         return rctree;
       } catch(Exception e) {
         throw new RuntimeException(e);

+ 33 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml

@@ -1308,6 +1308,37 @@
     <value>-1</value>
   </property>
 
+  <property>
+    <description>Whether YARN CGroups memory tracking is enabled.</description>
+    <name>yarn.nodemanager.resource.memory.enabled</name>
+    <value>false</value>
+  </property>
+
+  <property>
+    <description>Whether YARN CGroups strict memory enforcement is enabled.
+    </description>
+    <name>yarn.nodemanager.resource.memory.enforced</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <description>If memory limit is enforced, this the percentage of soft limit
+      compared to the memory assigned to the container. If there is memory
+      pressure container memory usage will be pushed back to its soft limit
+      by swapping out memory.
+    </description>
+    <name>yarn.nodemanager.resource.memory.cgroups.soft-limit-percentage</name>
+    <value>90.0</value>
+  </property>
+
+  <property>
+    <description>Container swappiness is the likelihood a page will be swapped
+      out compared to be kept in memory. Value is between 0-100.
+    </description>
+    <name>yarn.nodemanager.resource.memory.cgroups.swappiness</name>
+    <value>0</value>
+  </property>
+
   <property>
     <description>Whether physical memory limits will be enforced for
     containers.</description>
@@ -1622,7 +1653,8 @@
     or be allowed to consume spare resources if they need them. For example, turning the
     flag on will restrict apps to use only their share of CPU, even if the node has spare
     CPU cycles. The default value is false i.e. use available resources. Please note that
-    turning this flag on may reduce job throughput on the cluster.</description>
+    turning this flag on may reduce job throughput on the cluster. This setting does
+    not apply to other subsystems like memory.</description>
     <name>yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage</name>
     <value>false</value>
   </property>

+ 20 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/main/java/org/apache/hadoop/registry/client/binding/RegistryPathUtils.java

@@ -24,11 +24,13 @@ import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.PathNotFoundException;
 import org.apache.hadoop.registry.client.exceptions.InvalidPathnameException;
 import org.apache.hadoop.registry.client.impl.zk.RegistryInternalConstants;
+import org.apache.hadoop.registry.server.dns.BaseServiceRecordProcessor;
 import org.apache.zookeeper.common.PathUtils;
 
 import java.net.IDN;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 /**
@@ -45,6 +47,9 @@ public class RegistryPathUtils {
   private static final Pattern PATH_ENTRY_VALIDATION_PATTERN =
       Pattern.compile(RegistryInternalConstants.VALID_PATH_ENTRY_PATTERN);
 
+  private static final Pattern USER_NAME =
+      Pattern.compile("/users/([a-z][a-z0-9-.]*)");
+
   /**
    * Validate ZK path with the path itself included in
    * the exception text
@@ -215,4 +220,19 @@ public class RegistryPathUtils {
   public static String encodeYarnID(String yarnId) {
     return yarnId.replace("container", "ctr").replace("_", "-");
   }
+
+  /**
+   * Return the username found in the ZK path.
+   *
+   * @param recPath the ZK recPath.
+   * @return the user name.
+   */
+  public static String getUsername(String recPath) {
+    String user = "anonymous";
+    Matcher matcher = USER_NAME.matcher(recPath);
+    if (matcher.find()) {
+      user = matcher.group(1);
+    }
+    return user;
+  }
 }

+ 4 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/main/java/org/apache/hadoop/registry/client/binding/RegistryUtils.java

@@ -296,7 +296,10 @@ public class RegistryUtils {
    */
   public static String currentUser() {
     String shortUserName = currentUsernameUnencoded();
-    return encodeForRegistry(shortUserName);
+    String encodedName =  encodeForRegistry(shortUserName);
+    // DNS name doesn't allow "_", replace it with "-"
+    encodedName = RegistryUtils.convertUsername(encodedName);
+    return encodedName.replace("_", "-");
   }
 
   /**

+ 2 - 20
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/main/java/org/apache/hadoop/registry/server/dns/BaseServiceRecordProcessor.java

@@ -36,8 +36,6 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 
 /**
  * Provides common service record processing logic.
@@ -51,7 +49,6 @@ public abstract class BaseServiceRecordProcessor
   private String path;
   private String domain;
 
-  private static final Pattern USER_NAME = Pattern.compile("/users/(\\w*)/?");
   private static final String YARN_SERVICE_API_PREFIX =
       "classpath:org.apache.hadoop.yarn.service.";
   private static final String HTTP_API_TYPE = "http://";
@@ -75,21 +72,6 @@ public abstract class BaseServiceRecordProcessor
     initTypeToInfoMapping(record);
   }
 
-  /**
-   * Return the username found in the ZK path.
-   *
-   * @param recPath the ZK recPath.
-   * @return the user name.
-   */
-  protected String getUsername(String recPath) {
-    String user = "anonymous";
-    Matcher matcher = USER_NAME.matcher(recPath);
-    if (matcher.find()) {
-      user = matcher.group(1);
-    }
-    return user;
-  }
-
   /**
    * Return the IPv6 mapped address for the provided IPv4 address. Utilized
    * to create corresponding AAAA records.
@@ -300,7 +282,7 @@ public abstract class BaseServiceRecordProcessor
       String service = RegistryPathUtils.lastPathEntry(
           RegistryPathUtils.parentOf(RegistryPathUtils.parentOf(getPath())));
       String description = getRecord().description.toLowerCase();
-      String user = getUsername(getPath());
+      String user = RegistryPathUtils.getUsername(getPath());
       return Name.fromString(MessageFormat.format("{0}.{1}.{2}.{3}",
           description,
           service,
@@ -352,7 +334,7 @@ public abstract class BaseServiceRecordProcessor
      * @throws TextParseException
      */
     protected Name getServiceName() throws TextParseException {
-      String user = getUsername(getPath());
+      String user = RegistryPathUtils.getUsername(getPath());
       String service =
           String.format("%s.%s.%s",
               RegistryPathUtils.lastPathEntry(getPath()),

+ 9 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/test/java/org/apache/hadoop/registry/client/binding/TestRegistryPathUtils.java

@@ -80,7 +80,15 @@ public class TestRegistryPathUtils extends Assert {
     assertCreatedPathEquals("/alice", "/alice", "/");
   }
 
-
+  @Test
+  public void testGetUserFromPath() throws Exception {
+    assertEquals("bob", RegistryPathUtils
+        .getUsername("/registry/users/bob/services/yarn-service/test1/"));
+    assertEquals("bob-dev", RegistryPathUtils
+        .getUsername("/registry/users/bob-dev/services/yarn-service/test1"));
+    assertEquals("bob.dev", RegistryPathUtils
+        .getUsername("/registry/users/bob.dev/services/yarn-service/test1"));
+  }
 
 
   @Test

+ 4 - 7
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java

@@ -454,18 +454,15 @@ public class NodeManager extends CompositeService
     ((NMContext) context).setNodeStatusUpdater(nodeStatusUpdater);
     nmStore.setNodeStatusUpdater(nodeStatusUpdater);
 
-    super.serviceInit(conf);
-    // TODO add local dirs to del
-  }
-
-  @Override
-  protected void serviceStart() throws Exception {
+    // Do secure login before calling init for added services.
     try {
       doSecureLogin();
     } catch (IOException e) {
       throw new YarnRuntimeException("Failed NodeManager login", e);
     }
-    super.serviceStart();
+
+    super.serviceInit(conf);
+    // TODO add local dirs to del
   }
 
   @Override

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java

@@ -270,7 +270,7 @@ public class ContainerLaunch implements Callable<Integer> {
         // Write out the environment
         exec.writeLaunchEnv(containerScriptOutStream, environment,
             localResources, launchContext.getCommands(),
-            new Path(containerLogDirs.get(0)), user);
+            containerLogDir, user);
       }
       // /////////// End of writing out container-script
 

+ 8 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java

@@ -54,7 +54,7 @@ public interface CGroupsHandler {
       this.name = name;
     }
 
-    String getName() {
+    public String getName() {
       return name;
     }
 
@@ -112,6 +112,13 @@ public interface CGroupsHandler {
   void deleteCGroup(CGroupController controller, String cGroupId) throws
       ResourceHandlerException;
 
+  /**
+   * Gets the absolute path to the specified cgroup controller.
+   * @param controller - controller type for the cgroup
+   * @return the root of the controller.
+   */
+  String getControllerPath(CGroupController controller);
+
   /**
    * Gets the relative path for the cgroup, independent of a controller, for a
    * given cgroup id.

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java

@@ -125,7 +125,8 @@ class CGroupsHandlerImpl implements CGroupsHandler {
     initializeControllerPaths();
   }
 
-  private String getControllerPath(CGroupController controller) {
+  @Override
+  public String getControllerPath(CGroupController controller) {
     try {
       rwLock.readLock().lock();
       return controllerPaths.get(controller);

+ 29 - 23
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsMemoryResourceHandlerImpl.java

@@ -52,6 +52,7 @@ public class CGroupsMemoryResourceHandlerImpl implements MemoryResourceHandler {
   private static final int OPPORTUNISTIC_SOFT_LIMIT = 0;
 
   private CGroupsHandler cGroupsHandler;
+  private boolean enforce = true;
   private int swappiness = 0;
   // multiplier to set the soft limit - value should be between 0 and 1
   private float softLimit = 0.0f;
@@ -79,6 +80,9 @@ public class CGroupsMemoryResourceHandlerImpl implements MemoryResourceHandler {
       throw new ResourceHandlerException(msg);
     }
     this.cGroupsHandler.initializeCGroupController(MEMORY);
+    enforce = conf.getBoolean(
+        YarnConfiguration.NM_MEMORY_RESOURCE_ENFORCED,
+        YarnConfiguration.DEFAULT_NM_MEMORY_RESOURCE_ENFORCED);
     swappiness = conf
         .getInt(YarnConfiguration.NM_MEMORY_RESOURCE_CGROUPS_SWAPPINESS,
             YarnConfiguration.DEFAULT_NM_MEMORY_RESOURCE_CGROUPS_SWAPPINESS);
@@ -124,31 +128,33 @@ public class CGroupsMemoryResourceHandlerImpl implements MemoryResourceHandler {
         (long) (container.getResource().getMemorySize() * this.softLimit);
     long containerHardLimit = container.getResource().getMemorySize();
     cGroupsHandler.createCGroup(MEMORY, cgroupId);
-    try {
-      cGroupsHandler.updateCGroupParam(MEMORY, cgroupId,
-          CGroupsHandler.CGROUP_PARAM_MEMORY_HARD_LIMIT_BYTES,
-          String.valueOf(containerHardLimit) + "M");
-      ContainerTokenIdentifier id = container.getContainerTokenIdentifier();
-      if (id != null && id.getExecutionType() ==
-          ExecutionType.OPPORTUNISTIC) {
+    if (enforce) {
+      try {
         cGroupsHandler.updateCGroupParam(MEMORY, cgroupId,
-            CGroupsHandler.CGROUP_PARAM_MEMORY_SOFT_LIMIT_BYTES,
-            String.valueOf(OPPORTUNISTIC_SOFT_LIMIT) + "M");
-        cGroupsHandler.updateCGroupParam(MEMORY, cgroupId,
-            CGroupsHandler.CGROUP_PARAM_MEMORY_SWAPPINESS,
-            String.valueOf(OPPORTUNISTIC_SWAPPINESS));
-      } else {
-        cGroupsHandler.updateCGroupParam(MEMORY, cgroupId,
-            CGroupsHandler.CGROUP_PARAM_MEMORY_SOFT_LIMIT_BYTES,
-            String.valueOf(containerSoftLimit) + "M");
-        cGroupsHandler.updateCGroupParam(MEMORY, cgroupId,
-            CGroupsHandler.CGROUP_PARAM_MEMORY_SWAPPINESS,
-            String.valueOf(swappiness));
+            CGroupsHandler.CGROUP_PARAM_MEMORY_HARD_LIMIT_BYTES,
+            String.valueOf(containerHardLimit) + "M");
+        ContainerTokenIdentifier id = container.getContainerTokenIdentifier();
+        if (id != null && id.getExecutionType() ==
+            ExecutionType.OPPORTUNISTIC) {
+          cGroupsHandler.updateCGroupParam(MEMORY, cgroupId,
+              CGroupsHandler.CGROUP_PARAM_MEMORY_SOFT_LIMIT_BYTES,
+              String.valueOf(OPPORTUNISTIC_SOFT_LIMIT) + "M");
+          cGroupsHandler.updateCGroupParam(MEMORY, cgroupId,
+              CGroupsHandler.CGROUP_PARAM_MEMORY_SWAPPINESS,
+              String.valueOf(OPPORTUNISTIC_SWAPPINESS));
+        } else {
+          cGroupsHandler.updateCGroupParam(MEMORY, cgroupId,
+              CGroupsHandler.CGROUP_PARAM_MEMORY_SOFT_LIMIT_BYTES,
+              String.valueOf(containerSoftLimit) + "M");
+          cGroupsHandler.updateCGroupParam(MEMORY, cgroupId,
+              CGroupsHandler.CGROUP_PARAM_MEMORY_SWAPPINESS,
+              String.valueOf(swappiness));
+        }
+      } catch (ResourceHandlerException re) {
+        cGroupsHandler.deleteCGroup(MEMORY, cgroupId);
+        LOG.warn("Could not update cgroup for container", re);
+        throw re;
       }
-    } catch (ResourceHandlerException re) {
-      cGroupsHandler.deleteCGroup(MEMORY, cgroupId);
-      LOG.warn("Could not update cgroup for container", re);
-      throw re;
     }
     List<PrivilegedOperation> ret = new ArrayList<>();
     ret.add(new PrivilegedOperation(

+ 357 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsResourceCalculator.java

@@ -0,0 +1,357 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.util.CpuTimeTracker;
+import org.apache.hadoop.util.Shell;
+import org.apache.hadoop.util.SysInfoLinux;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.util.Clock;
+import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
+import org.apache.hadoop.yarn.util.SystemClock;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.math.BigInteger;
+import java.nio.charset.Charset;
+import java.util.function.Function;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * A cgroups file-system based Resource calculator without the process tree
+ * features.
+ *
+ * CGroups has its limitations. It can only be enabled, if both CPU and memory
+ * cgroups are enabled with yarn.nodemanager.resource.cpu.enabled and
+ * yarn.nodemanager.resource.memory.enabled respectively. This means that
+ * memory limits are enforced by default. You can turn this off and keep
+ * memory reporting only with yarn.nodemanager.resource.memory.enforced.
+ *
+ * Another limitation is virtual memory measurement. CGroups does not have the
+ * ability to measure virtual memory usage. This includes memory reserved but
+ * not used. CGroups measures used memory as sa sum of
+ * physical memory and swap usage. This will be returned in the virtual
+ * memory counters.
+ * If the real virtual memory is required please use the legacy procfs based
+ * resource calculator or CombinedResourceCalculator.
+ */
+public class CGroupsResourceCalculator extends ResourceCalculatorProcessTree {
+  enum Result {
+    Continue,
+    Exit
+  }
+  protected static final Log LOG = LogFactory
+      .getLog(CGroupsResourceCalculator.class);
+  private static final String PROCFS = "/proc";
+  static final String CGROUP = "cgroup";
+  static final String CPU_STAT = "cpuacct.stat";
+  static final String MEM_STAT = "memory.usage_in_bytes";
+  static final String MEMSW_STAT = "memory.memsw.usage_in_bytes";
+  private static final String USER = "user ";
+  private static final String SYSTEM = "system ";
+
+  private static final Pattern CGROUP_FILE_FORMAT = Pattern.compile(
+      "^(\\d+):([^:]+):/(.*)$");
+  private final String procfsDir;
+  private CGroupsHandler cGroupsHandler;
+
+  private String pid;
+  private File cpuStat;
+  private File memStat;
+  private File memswStat;
+
+  private BigInteger processTotalJiffies;
+  private long processPhysicalMemory;
+  private long processVirtualMemory;
+
+  private final long jiffyLengthMs;
+  private final CpuTimeTracker cpuTimeTracker;
+  private Clock clock;
+
+  /**
+   * Create resource calculator for all Yarn containers.
+   */
+  public CGroupsResourceCalculator()
+      throws YarnException {
+    this(null, PROCFS, ResourceHandlerModule.getCGroupsHandler(),
+        SystemClock.getInstance(), SysInfoLinux.JIFFY_LENGTH_IN_MILLIS);
+  }
+
+  /**
+   * Create resource calculator for the container that has the specified pid.
+   * @param pid A pid from the cgroup or null for all containers
+   */
+  public CGroupsResourceCalculator(String pid) {
+    this(pid, PROCFS, ResourceHandlerModule.getCGroupsHandler(),
+        SystemClock.getInstance(), SysInfoLinux.JIFFY_LENGTH_IN_MILLIS);
+  }
+
+  /**
+   * Create resource calculator for testing.
+   * @param pid A pid from the cgroup or null for all containers
+   * @param procfsDir Path to /proc or a mock /proc directory
+   * @param cGroupsHandler Initialized cgroups handler object
+   * @param clock A clock object
+   * @param jiffyLengthMs0 Jiffy length in milliseconds
+   */
+  @VisibleForTesting
+  CGroupsResourceCalculator(String pid, String procfsDir,
+                            CGroupsHandler cGroupsHandler,
+                            Clock clock,
+                            long jiffyLengthMs0) {
+    super(pid);
+    this.procfsDir = procfsDir;
+    this.cGroupsHandler = cGroupsHandler;
+    this.pid = pid != null && pid.equals("0") ? "1" : pid;
+    this.jiffyLengthMs = jiffyLengthMs0;
+    this.cpuTimeTracker =
+        new CpuTimeTracker(this.jiffyLengthMs);
+    this.clock = clock;
+    this.processTotalJiffies = BigInteger.ZERO;
+    this.processPhysicalMemory = UNAVAILABLE;
+    this.processVirtualMemory = UNAVAILABLE;
+  }
+
+  @Override
+  public void initialize() throws YarnException {
+    if (!CGroupsResourceCalculator.isAvailable()) {
+      throw new YarnException("CGroupsResourceCalculator is not available");
+    }
+    setCGroupFilePaths();
+  }
+
+  @Override
+  public float getCpuUsagePercent() {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Process " + pid + " jiffies:" + processTotalJiffies);
+    }
+    return cpuTimeTracker.getCpuTrackerUsagePercent();
+  }
+
+  @Override
+  public long getCumulativeCpuTime() {
+    if (jiffyLengthMs < 0) {
+      return UNAVAILABLE;
+    }
+    return processTotalJiffies.longValue() * jiffyLengthMs;
+  }
+
+  @Override
+  public long getRssMemorySize(int olderThanAge) {
+    if (olderThanAge > 1) {
+      return UNAVAILABLE;
+    }
+    return processPhysicalMemory;
+  }
+
+  @Override
+  public long getVirtualMemorySize(int olderThanAge) {
+    if (olderThanAge > 1) {
+      return UNAVAILABLE;
+    }
+    return processVirtualMemory;
+  }
+
+  @Override
+  public void updateProcessTree() {
+    try {
+      this.processTotalJiffies = readTotalProcessJiffies();
+      cpuTimeTracker.updateElapsedJiffies(processTotalJiffies,
+          clock.getTime());
+    } catch (YarnException e) {
+      LOG.warn("Failed to parse " + pid, e);
+    }
+    processPhysicalMemory = getMemorySize(memStat);
+    if (memswStat.exists()) {
+      processVirtualMemory = getMemorySize(memswStat);
+    } else if(LOG.isDebugEnabled()) {
+      LOG.debug("Swap cgroups monitoring is not compiled into the kernel " +
+          memswStat.getAbsolutePath().toString());
+    }
+  }
+
+  @Override
+  public String getProcessTreeDump() {
+    // We do not have a process tree in cgroups return just the pid for tracking
+    return pid;
+  }
+
+  @Override
+  public boolean checkPidPgrpidForMatch() {
+    // We do not have a process tree in cgroups returning default ok
+    return true;
+  }
+
+  /**
+   * Checks if the CGroupsResourceCalculator is available on this system.
+   * This assumes that Linux container executor is already initialized.
+   *
+   * @return true if CGroupsResourceCalculator is available. False otherwise.
+   */
+  public static boolean isAvailable() {
+    try {
+      if (!Shell.LINUX) {
+        LOG.info("CGroupsResourceCalculator currently is supported only on "
+            + "Linux.");
+        return false;
+      }
+      if (ResourceHandlerModule.getCGroupsHandler() == null ||
+          ResourceHandlerModule.getCpuResourceHandler() == null ||
+          ResourceHandlerModule.getMemoryResourceHandler() == null) {
+        LOG.info("CGroupsResourceCalculator requires enabling CGroups" +
+            "cpu and memory");
+        return false;
+      }
+    } catch (SecurityException se) {
+      LOG.warn("Failed to get Operating System name. " + se);
+      return false;
+    }
+    return true;
+  }
+
+  private long getMemorySize(File cgroupUsageFile) {
+    long[] mem = new long[1];
+    try {
+      processFile(cgroupUsageFile, (String line) -> {
+        mem[0] = Long.parseLong(line);
+        return Result.Exit;
+      });
+      return mem[0];
+    } catch (YarnException e) {
+      LOG.warn("Failed to parse cgroups " + memswStat, e);
+    }
+    return UNAVAILABLE;
+  }
+
+  private BigInteger readTotalProcessJiffies() throws YarnException {
+    final BigInteger[] totalCPUTimeJiffies = new BigInteger[1];
+    totalCPUTimeJiffies[0] = BigInteger.ZERO;
+    processFile(cpuStat, (String line) -> {
+      if (line.startsWith(USER)) {
+        totalCPUTimeJiffies[0] = totalCPUTimeJiffies[0].add(
+            new BigInteger(line.substring(USER.length())));
+      }
+      if (line.startsWith(SYSTEM)) {
+        totalCPUTimeJiffies[0] = totalCPUTimeJiffies[0].add(
+            new BigInteger(line.substring(SYSTEM.length())));
+      }
+      return Result.Continue;
+    });
+    return totalCPUTimeJiffies[0];
+  }
+
+  private String getCGroupRelativePath(
+      CGroupsHandler.CGroupController controller)
+      throws YarnException {
+    if (pid == null) {
+      return cGroupsHandler.getRelativePathForCGroup("");
+    } else {
+      return getCGroupRelativePathForPid(controller);
+    }
+  }
+
+  private String getCGroupRelativePathForPid(
+      CGroupsHandler.CGroupController controller)
+      throws YarnException {
+    File pidCgroupFile = new File(new File(procfsDir, pid), CGROUP);
+    String[] result = new String[1];
+    processFile(pidCgroupFile, (String line)->{
+      Matcher m = CGROUP_FILE_FORMAT.matcher(line);
+      boolean mat = m.find();
+      if (mat) {
+        if (m.group(2).contains(controller.getName())) {
+          // Instead of returning the full path we compose it
+          // based on the last item as the container id
+          // This helps to avoid confusion within a privileged Docker container
+          // where the path is referred in /proc/<pid>/cgroup as
+          // /docker/<dcontainerid>/hadoop-yarn/<containerid>
+          // but it is /hadoop-yarn/<containerid> in the cgroups hierarchy
+          String cgroupPath = m.group(3);
+
+          if (cgroupPath != null) {
+            String cgroup =
+                new File(cgroupPath).toPath().getFileName().toString();
+            result[0] = cGroupsHandler.getRelativePathForCGroup(cgroup);
+          } else {
+            LOG.warn("Invalid cgroup path for " + pidCgroupFile);
+          }
+          return Result.Exit;
+        }
+      } else {
+        LOG.warn(
+            "Unexpected: cgroup file is not in the expected format"
+                + " for process with pid " + pid);
+      }
+      return Result.Continue;
+    });
+    if (result[0] == null) {
+      throw new YarnException(controller.getName() + " CGroup for pid " + pid +
+          " not found " + pidCgroupFile);
+    }
+    return result[0];
+  }
+
+  private void processFile(File file, Function<String, Result> processLine)
+      throws YarnException {
+    // Read "procfsDir/<pid>/stat" file - typically /proc/<pid>/stat
+    try (InputStreamReader fReader = new InputStreamReader(
+        new FileInputStream(file), Charset.forName("UTF-8"))) {
+      try (BufferedReader in = new BufferedReader(fReader)) {
+        try {
+          String str;
+          while ((str = in.readLine()) != null) {
+            Result result = processLine.apply(str);
+            if (result == Result.Exit) {
+              return;
+            }
+          }
+        } catch (IOException io) {
+          throw new YarnException("Error reading the stream " + io, io);
+        }
+      }
+    } catch (IOException f) {
+      throw new YarnException("The process vanished in the interim " + pid, f);
+    }
+  }
+
+  void setCGroupFilePaths() throws YarnException {
+    if (cGroupsHandler == null) {
+      throw new YarnException("CGroups handler is not initialized");
+    }
+    File cpuDir = new File(
+        cGroupsHandler.getControllerPath(
+            CGroupsHandler.CGroupController.CPUACCT),
+        getCGroupRelativePath(CGroupsHandler.CGroupController.CPUACCT));
+    File memDir = new File(
+        cGroupsHandler.getControllerPath(
+            CGroupsHandler.CGroupController.MEMORY),
+        getCGroupRelativePath(CGroupsHandler.CGroupController.MEMORY));
+    cpuStat = new File(cpuDir, CPU_STAT);
+    memStat = new File(memDir, MEM_STAT);
+    memswStat = new File(memDir, MEMSW_STAT);
+  }
+
+}

+ 108 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CombinedResourceCalculator.java

@@ -0,0 +1,108 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.util.ProcfsBasedProcessTree;
+import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
+
+/**
+ * CombinedResourceCalculator is a resource calculator that uses cgroups but
+ * it is backward compatible with procfs in terms of virtual memory usage.
+ */
+public class CombinedResourceCalculator  extends ResourceCalculatorProcessTree {
+  protected static final Log LOG = LogFactory
+      .getLog(CombinedResourceCalculator.class);
+  private ProcfsBasedProcessTree procfs;
+  private CGroupsResourceCalculator cgroup;
+
+  public CombinedResourceCalculator(String pid) {
+    super(pid);
+    procfs = new ProcfsBasedProcessTree(pid);
+    cgroup = new CGroupsResourceCalculator(pid);
+  }
+
+  @Override
+  public void initialize() throws YarnException {
+    procfs.initialize();
+    cgroup.initialize();
+  }
+
+  @Override
+  public void updateProcessTree() {
+    procfs.updateProcessTree();
+    cgroup.updateProcessTree();
+  }
+
+  @Override
+  public String getProcessTreeDump() {
+    return procfs.getProcessTreeDump();
+  }
+
+  @Override
+  public float getCpuUsagePercent() {
+    float cgroupUsage = cgroup.getCpuUsagePercent();
+    if (LOG.isDebugEnabled()) {
+      float procfsUsage = procfs.getCpuUsagePercent();
+      LOG.debug("CPU Comparison:" + procfsUsage + " " + cgroupUsage);
+      LOG.debug("Jiffy Comparison:" +
+          procfs.getCumulativeCpuTime() + " " +
+          cgroup.getCumulativeCpuTime());
+    }
+
+    return cgroupUsage;
+  }
+
+  @Override
+  public boolean checkPidPgrpidForMatch() {
+    return procfs.checkPidPgrpidForMatch();
+  }
+
+  @Override
+  public long getCumulativeCpuTime() {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("CPU Comparison:" +
+          procfs.getCumulativeCpuTime() + " " +
+          cgroup.getCumulativeCpuTime());
+    }
+    return cgroup.getCumulativeCpuTime();
+  }
+
+  @Override
+  public long getRssMemorySize(int olderThanAge) {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("MEM Comparison:" +
+          procfs.getRssMemorySize(olderThanAge) + " " +
+          cgroup.getRssMemorySize(olderThanAge));
+    }
+    return cgroup.getRssMemorySize(olderThanAge);
+  }
+
+  @Override
+  public long getVirtualMemorySize(int olderThanAge) {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("VMEM Comparison:" +
+          procfs.getVirtualMemorySize(olderThanAge) + " " +
+          cgroup.getVirtualMemorySize(olderThanAge));
+    }
+    return procfs.getVirtualMemorySize(olderThanAge);
+  }
+}

+ 33 - 10
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java

@@ -25,7 +25,6 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
@@ -101,7 +100,27 @@ public class ResourceHandlerModule {
     return cGroupsHandler;
   }
 
-  private static CGroupsCpuResourceHandlerImpl getCGroupsCpuResourceHandler(
+  public static NetworkPacketTaggingHandlerImpl
+      getNetworkResourceHandler() {
+    return networkPacketTaggingHandlerImpl;
+  }
+
+  public static DiskResourceHandler
+      getDiskResourceHandler() {
+    return cGroupsBlkioResourceHandler;
+  }
+
+  public static MemoryResourceHandler
+      getMemoryResourceHandler() {
+    return cGroupsMemoryResourceHandler;
+  }
+
+  public static CpuResourceHandler
+      getCpuResourceHandler() {
+    return cGroupsCpuResourceHandler;
+  }
+
+  private static CGroupsCpuResourceHandlerImpl initCGroupsCpuResourceHandler(
       Configuration conf) throws ResourceHandlerException {
     boolean cgroupsCpuEnabled =
         conf.getBoolean(YarnConfiguration.NM_CPU_RESOURCE_ENABLED,
@@ -150,7 +169,7 @@ public class ResourceHandlerModule {
     }
   }
 
-  public static ResourceHandler getNetworkResourceHandler(Configuration conf)
+  public static ResourceHandler initNetworkResourceHandler(Configuration conf)
         throws ResourceHandlerException {
     boolean useNetworkTagHandler = conf.getBoolean(
         YarnConfiguration.NM_NETWORK_TAG_HANDLER_ENABLED,
@@ -181,12 +200,12 @@ public class ResourceHandlerModule {
   }
 
   public static OutboundBandwidthResourceHandler
-      getOutboundBandwidthResourceHandler(Configuration conf)
+      initOutboundBandwidthResourceHandler(Configuration conf)
       throws ResourceHandlerException {
     return getTrafficControlBandwidthHandler(conf);
   }
 
-  public static DiskResourceHandler getDiskResourceHandler(Configuration conf)
+  public static DiskResourceHandler initDiskResourceHandler(Configuration conf)
       throws ResourceHandlerException {
     if (conf.getBoolean(YarnConfiguration.NM_DISK_RESOURCE_ENABLED,
         YarnConfiguration.DEFAULT_NM_DISK_RESOURCE_ENABLED)) {
@@ -210,7 +229,7 @@ public class ResourceHandlerModule {
     return cGroupsBlkioResourceHandler;
   }
 
-  public static MemoryResourceHandler getMemoryResourceHandler(
+  public static MemoryResourceHandler initMemoryResourceHandler(
       Configuration conf) throws ResourceHandlerException {
     if (conf.getBoolean(YarnConfiguration.NM_MEMORY_RESOURCE_ENABLED,
         YarnConfiguration.DEFAULT_NM_MEMORY_RESOURCE_ENABLED)) {
@@ -246,10 +265,14 @@ public class ResourceHandlerModule {
       throws ResourceHandlerException {
     ArrayList<ResourceHandler> handlerList = new ArrayList<>();
 
-    addHandlerIfNotNull(handlerList, getNetworkResourceHandler(conf));
-    addHandlerIfNotNull(handlerList, getDiskResourceHandler(conf));
-    addHandlerIfNotNull(handlerList, getMemoryResourceHandler(conf));
-    addHandlerIfNotNull(handlerList, getCGroupsCpuResourceHandler(conf));
+    addHandlerIfNotNull(handlerList,
+        initNetworkResourceHandler(conf));
+    addHandlerIfNotNull(handlerList,
+        initDiskResourceHandler(conf));
+    addHandlerIfNotNull(handlerList,
+        initMemoryResourceHandler(conf));
+    addHandlerIfNotNull(handlerList,
+        initCGroupsCpuResourceHandler(conf));
     addHandlersFromConfiguredResourcePlugins(handlerList, conf, nmContext);
     resourceHandlerChain = new ResourceHandlerChain(handlerList);
   }

+ 29 - 15
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java

@@ -58,6 +58,8 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.Contai
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeConstants;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeContext;
 
+import java.net.InetAddress;
+import java.net.UnknownHostException;
 import java.nio.file.Files;
 import java.nio.file.Paths;
 import java.util.ArrayList;
@@ -230,7 +232,6 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
   private PrivilegedOperationExecutor privilegedOperationExecutor;
   private Set<String> allowedNetworks = new HashSet<>();
   private String defaultNetwork;
-  private String cgroupsRootDirectory;
   private CGroupsHandler cGroupsHandler;
   private AccessControlList privilegedContainersAcl;
   private boolean enableUserReMapping;
@@ -290,7 +291,6 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
       LOG.info("cGroupsHandler is null - cgroups not in use.");
     } else {
       this.cGroupsHandler = cGroupsHandler;
-      this.cgroupsRootDirectory = cGroupsHandler.getCGroupMountPath();
     }
   }
 
@@ -437,7 +437,6 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
       throws ContainerExecutionException {
     DockerVolumeCommand dockerVolumeInspectCommand = new DockerVolumeCommand(
         DockerVolumeCommand.VOLUME_LS_SUB_COMMAND);
-    dockerVolumeInspectCommand.setFormat("{{.Name}},{{.Driver}}");
     String output = runDockerVolumeCommand(dockerVolumeInspectCommand,
         container);
 
@@ -450,13 +449,7 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
 
     for (String line : output.split("\n")) {
       line = line.trim();
-      String[] arr = line.split(",");
-      String v = arr[0].trim();
-      String d = null;
-      if (arr.length > 1) {
-        d = arr[1].trim();
-      }
-      if (d != null && volumeName.equals(v) && driverName.equals(d)) {
+      if (line.contains(volumeName) && line.contains(driverName)) {
         // Good we found it.
         LOG.info(
             "Docker volume-name=" + volumeName + " driver-name=" + driverName
@@ -796,11 +789,6 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
     setHostname(runCommand, containerIdStr, hostname);
     runCommand.setCapabilities(capabilities);
 
-    if(cgroupsRootDirectory != null) {
-      runCommand.addReadOnlyMountLocation(cgroupsRootDirectory,
-          cgroupsRootDirectory, false);
-    }
-
     List<String> allDirs = new ArrayList<>(containerLocalDirs);
     allDirs.addAll(filecacheDirs);
     allDirs.add(containerWorkDir.toString());
@@ -1020,6 +1008,32 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
       }
       String ips = output.substring(0, index).trim();
       String host = output.substring(index+1).trim();
+      if (ips.equals("")) {
+        String network;
+        try {
+          network = container.getLaunchContext().getEnvironment()
+              .get("YARN_CONTAINER_RUNTIME_DOCKER_CONTAINER_NETWORK");
+          if (network == null || network.isEmpty()) {
+            network = defaultNetwork;
+          }
+        } catch (NullPointerException e) {
+          network = defaultNetwork;
+        }
+        boolean useHostNetwork = network.equalsIgnoreCase("host");
+        if (useHostNetwork) {
+          // Report back node manager IP in the event where docker
+          // inspect reports no IP address.  This is for bridging a gap for
+          // docker environment to run with host network.
+          InetAddress address;
+          try {
+            address = InetAddress.getLocalHost();
+            ips = address.getHostAddress();
+          } catch (UnknownHostException e) {
+            LOG.error("Can not determine IP for container:"
+                + containerId);
+          }
+        }
+      }
       String[] ipAndHost = new String[2];
       ipAndHost[0] = ips;
       ipAndHost[1] = host;

+ 22 - 11
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java

@@ -215,15 +215,25 @@ public class ContainersMonitorImpl extends AbstractService implements
         YarnConfiguration.DEFAULT_NM_CONTAINER_MONITOR_ENABLED);
   }
 
+  /**
+   * Get the best process tree calculator.
+   * @param pId container process id
+   * @return process tree calculator
+   */
+  private ResourceCalculatorProcessTree
+      getResourceCalculatorProcessTree(String pId) {
+    return ResourceCalculatorProcessTree.
+        getResourceCalculatorProcessTree(
+            pId, processTreeClass, conf);
+  }
+
   private boolean isResourceCalculatorAvailable() {
     if (resourceCalculatorPlugin == null) {
       LOG.info("ResourceCalculatorPlugin is unavailable on this system. " + this
           .getClass().getName() + " is disabled.");
       return false;
     }
-    if (ResourceCalculatorProcessTree
-        .getResourceCalculatorProcessTree("0", processTreeClass, conf)
-        == null) {
+    if (getResourceCalculatorProcessTree("0") == null) {
       LOG.info("ResourceCalculatorProcessTree is unavailable on this system. "
           + this.getClass().getName() + " is disabled.");
       return false;
@@ -535,9 +545,7 @@ public class ContainersMonitorImpl extends AbstractService implements
             LOG.debug("Tracking ProcessTree " + pId + " for the first time");
           }
           ResourceCalculatorProcessTree pt =
-                  ResourceCalculatorProcessTree.
-                        getResourceCalculatorProcessTree(
-                            pId, processTreeClass, conf);
+              getResourceCalculatorProcessTree(pId);
           ptInfo.setPid(pId);
           ptInfo.setProcessTree(pt);
 
@@ -599,11 +607,14 @@ public class ContainersMonitorImpl extends AbstractService implements
       long pmemLimit = ptInfo.getPmemLimit();
       if (AUDITLOG.isDebugEnabled()) {
         AUDITLOG.debug(String.format(
-                "Memory usage of ProcessTree %s for container-id %s: ",
-                pId, containerId.toString()) +
-                formatUsageString(
-                      currentVmemUsage, vmemLimit,
-                      currentPmemUsage, pmemLimit));
+            "Resource usage of ProcessTree %s for container-id %s:" +
+                " %s CPU:%f CPU/core:%f",
+            pId, containerId.toString(),
+            formatUsageString(
+                currentVmemUsage, vmemLimit,
+                currentPmemUsage, pmemLimit),
+            cpuUsagePercentPerCore,
+            cpuUsageTotalCoresPercentage));
       }
 
       // Add resource utilization for this container

+ 1 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/timelineservice/NMTimelinePublisher.java

@@ -100,6 +100,7 @@ public class NMTimelinePublisher extends CompositeService {
     this.nmLoginUGI =  UserGroupInformation.isSecurityEnabled() ?
         UserGroupInformation.getLoginUser() :
         UserGroupInformation.getCurrentUser();
+    LOG.info("Initialized NMTimelinePublisher UGI to " + nmLoginUGI);
     super.serviceInit(conf);
   }
 

+ 45 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsMemoryResourceHandlerImpl.java

@@ -148,6 +148,51 @@ public class TestCGroupsMemoryResourceHandlerImpl {
         args.get(0));
   }
 
+  @Test
+  public void testPreStartNonEnforced() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
+    conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);
+    conf.setBoolean(YarnConfiguration.NM_MEMORY_RESOURCE_ENFORCED, false);
+    cGroupsMemoryResourceHandler.bootstrap(conf);
+    String id = "container_01_01";
+    String path = "test-path/" + id;
+    ContainerId mockContainerId = mock(ContainerId.class);
+    when(mockContainerId.toString()).thenReturn(id);
+    Container mockContainer = mock(Container.class);
+    when(mockContainer.getContainerId()).thenReturn(mockContainerId);
+    when(mockCGroupsHandler
+        .getPathForCGroupTasks(CGroupsHandler.CGroupController.MEMORY, id))
+        .thenReturn(path);
+    int memory = 1024;
+    when(mockContainer.getResource())
+        .thenReturn(Resource.newInstance(memory, 1));
+    List<PrivilegedOperation> ret =
+        cGroupsMemoryResourceHandler.preStart(mockContainer);
+    verify(mockCGroupsHandler, times(1))
+        .createCGroup(CGroupsHandler.CGroupController.MEMORY, id);
+    verify(mockCGroupsHandler, times(0))
+        .updateCGroupParam(CGroupsHandler.CGroupController.MEMORY, id,
+            CGroupsHandler.CGROUP_PARAM_MEMORY_HARD_LIMIT_BYTES,
+            String.valueOf(memory) + "M");
+    verify(mockCGroupsHandler, times(0))
+        .updateCGroupParam(CGroupsHandler.CGroupController.MEMORY, id,
+            CGroupsHandler.CGROUP_PARAM_MEMORY_SOFT_LIMIT_BYTES,
+            String.valueOf((int) (memory * 0.9)) + "M");
+    verify(mockCGroupsHandler, times(0))
+        .updateCGroupParam(CGroupsHandler.CGroupController.MEMORY, id,
+            CGroupsHandler.CGROUP_PARAM_MEMORY_SWAPPINESS, String.valueOf(0));
+    Assert.assertNotNull(ret);
+    Assert.assertEquals(1, ret.size());
+    PrivilegedOperation op = ret.get(0);
+    Assert.assertEquals(PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP,
+        op.getOperationType());
+    List<String> args = op.getArguments();
+    Assert.assertEquals(1, args.size());
+    Assert.assertEquals(PrivilegedOperation.CGROUP_ARG_PREFIX + path,
+        args.get(0));
+  }
+
   @Test
   public void testReacquireContainer() throws Exception {
     ContainerId containerIdMock = mock(ContainerId.class);

+ 274 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsResourceCalculator.java

@@ -0,0 +1,274 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.util.ControlledClock;
+import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.File;
+
+import static org.mockito.Mockito.*;
+
+/**
+ * Unit test for CGroupsResourceCalculator.
+ */
+public class TestCGroupsResourceCalculator {
+
+  private ControlledClock clock = new ControlledClock();
+  private CGroupsHandler cGroupsHandler = mock(CGroupsHandler.class);
+  private String basePath = "/tmp/" + this.getClass().getName();
+
+  public TestCGroupsResourceCalculator() {
+    when(cGroupsHandler.getRelativePathForCGroup("container_1"))
+        .thenReturn("/yarn/container_1");
+    when(cGroupsHandler.getRelativePathForCGroup("")).thenReturn("/yarn/");
+  }
+
+  @Test(expected = YarnException.class)
+  public void testPidNotFound() throws Exception {
+    CGroupsResourceCalculator calculator =
+        new CGroupsResourceCalculator(
+            "1234", ".", cGroupsHandler, clock, 10);
+    calculator.setCGroupFilePaths();
+    Assert.assertEquals("Expected exception", null, calculator);
+  }
+
+  @Test(expected = YarnException.class)
+  public void testNoMemoryCGgroupMount() throws Exception {
+    File procfs = new File(basePath + "/1234");
+    Assert.assertTrue("Setup error", procfs.mkdirs());
+    try {
+      FileUtils.writeStringToFile(
+          new File(procfs, CGroupsResourceCalculator.CGROUP),
+          "7:devices:/yarn/container_1\n" +
+              "6:cpuacct,cpu:/yarn/container_1\n" +
+              "5:pids:/yarn/container_1\n");
+      CGroupsResourceCalculator calculator =
+          new CGroupsResourceCalculator(
+              "1234", basePath,
+              cGroupsHandler, clock, 10);
+      calculator.setCGroupFilePaths();
+      Assert.assertEquals("Expected exception", null, calculator);
+    } finally {
+      FileUtils.deleteDirectory(new File(basePath));
+    }
+  }
+
+  @Test
+  public void testCGgroupNotFound() throws Exception {
+    File procfs = new File(basePath + "/1234");
+    Assert.assertTrue("Setup error", procfs.mkdirs());
+    try {
+      FileUtils.writeStringToFile(
+          new File(procfs, CGroupsResourceCalculator.CGROUP),
+          "7:devices:/yarn/container_1\n" +
+              "6:cpuacct,cpu:/yarn/container_1\n" +
+              "5:pids:/yarn/container_1\n" +
+              "4:memory:/yarn/container_1\n");
+
+      CGroupsResourceCalculator calculator =
+          new CGroupsResourceCalculator(
+              "1234", basePath,
+              cGroupsHandler, clock, 10);
+      calculator.setCGroupFilePaths();
+      calculator.updateProcessTree();
+      Assert.assertEquals("cgroups should be missing",
+          (long)ResourceCalculatorProcessTree.UNAVAILABLE,
+          calculator.getRssMemorySize(0));
+    } finally {
+      FileUtils.deleteDirectory(new File(basePath));
+    }
+  }
+
+  @Test
+  public void testCPUParsing() throws Exception {
+    File cgcpuacctDir =
+        new File(basePath + "/cgcpuacct");
+    File cgcpuacctContainerDir =
+        new File(cgcpuacctDir, "/yarn/container_1");
+    File procfs = new File(basePath + "/1234");
+    when(cGroupsHandler.getControllerPath(
+        CGroupsHandler.CGroupController.CPUACCT)).
+        thenReturn(cgcpuacctDir.getAbsolutePath());
+    Assert.assertTrue("Setup error", procfs.mkdirs());
+    Assert.assertTrue("Setup error", cgcpuacctContainerDir.mkdirs());
+    try {
+      FileUtils.writeStringToFile(
+          new File(procfs, CGroupsResourceCalculator.CGROUP),
+          "7:devices:/yarn/container_1\n" +
+              "6:cpuacct,cpu:/yarn/container_1\n" +
+              "5:pids:/yarn/container_1\n" +
+              "4:memory:/yarn/container_1\n");
+      FileUtils.writeStringToFile(
+          new File(cgcpuacctContainerDir, CGroupsResourceCalculator.CPU_STAT),
+          "Can you handle this?\n" +
+              "user 5415\n" +
+              "system 3632");
+      CGroupsResourceCalculator calculator =
+          new CGroupsResourceCalculator(
+              "1234", basePath,
+              cGroupsHandler, clock, 10);
+      calculator.setCGroupFilePaths();
+      calculator.updateProcessTree();
+      Assert.assertEquals("Incorrect CPU usage",
+          90470,
+          calculator.getCumulativeCpuTime());
+    } finally {
+      FileUtils.deleteDirectory(new File(basePath));
+    }
+  }
+
+  @Test
+  public void testMemoryParsing() throws Exception {
+    File cgcpuacctDir =
+        new File(basePath + "/cgcpuacct");
+    File cgcpuacctContainerDir =
+        new File(cgcpuacctDir, "/yarn/container_1");
+    File cgmemoryDir =
+        new File(basePath + "/memory");
+    File cgMemoryContainerDir =
+        new File(cgmemoryDir, "/yarn/container_1");
+    File procfs = new File(basePath + "/1234");
+    when(cGroupsHandler.getControllerPath(
+        CGroupsHandler.CGroupController.MEMORY)).
+        thenReturn(cgmemoryDir.getAbsolutePath());
+    Assert.assertTrue("Setup error", procfs.mkdirs());
+    Assert.assertTrue("Setup error", cgcpuacctContainerDir.mkdirs());
+    Assert.assertTrue("Setup error", cgMemoryContainerDir.mkdirs());
+    try {
+      FileUtils.writeStringToFile(
+          new File(procfs, CGroupsResourceCalculator.CGROUP),
+              "6:cpuacct,cpu:/yarn/container_1\n" +
+              "4:memory:/yarn/container_1\n");
+      FileUtils.writeStringToFile(
+          new File(cgMemoryContainerDir, CGroupsResourceCalculator.MEM_STAT),
+          "418496512\n");
+
+      CGroupsResourceCalculator calculator =
+          new CGroupsResourceCalculator(
+              "1234", basePath,
+              cGroupsHandler, clock, 10);
+      calculator.setCGroupFilePaths();
+
+      calculator.updateProcessTree();
+      // Test the case where memsw is not available (Ubuntu)
+      Assert.assertEquals("Incorrect memory usage",
+          418496512,
+          calculator.getRssMemorySize());
+      Assert.assertEquals("Incorrect swap usage",
+          (long)ResourceCalculatorProcessTree.UNAVAILABLE,
+          calculator.getVirtualMemorySize());
+
+      // Test the case where memsw is available
+      FileUtils.writeStringToFile(
+          new File(cgMemoryContainerDir, CGroupsResourceCalculator.MEMSW_STAT),
+          "418496513\n");
+      calculator.updateProcessTree();
+      Assert.assertEquals("Incorrect swap usage",
+          418496513,
+          calculator.getVirtualMemorySize());
+    } finally {
+      FileUtils.deleteDirectory(new File(basePath));
+    }
+  }
+
+  @Test
+  public void testCPUParsingRoot() throws Exception {
+    File cgcpuacctDir =
+        new File(basePath + "/cgcpuacct");
+    File cgcpuacctRootDir =
+        new File(cgcpuacctDir, "/yarn");
+    when(cGroupsHandler.getControllerPath(
+        CGroupsHandler.CGroupController.CPUACCT)).
+        thenReturn(cgcpuacctDir.getAbsolutePath());
+    Assert.assertTrue("Setup error", cgcpuacctRootDir.mkdirs());
+    try {
+      FileUtils.writeStringToFile(
+          new File(cgcpuacctRootDir, CGroupsResourceCalculator.CPU_STAT),
+              "user 5415\n" +
+              "system 3632");
+      CGroupsResourceCalculator calculator =
+          new CGroupsResourceCalculator(
+              null, basePath,
+              cGroupsHandler, clock, 10);
+      calculator.setCGroupFilePaths();
+      calculator.updateProcessTree();
+      Assert.assertEquals("Incorrect CPU usage",
+          90470,
+          calculator.getCumulativeCpuTime());
+    } finally {
+      FileUtils.deleteDirectory(new File(basePath));
+    }
+  }
+
+  @Test
+  public void testMemoryParsingRoot() throws Exception {
+    File cgcpuacctDir =
+        new File(basePath + "/cgcpuacct");
+    File cgcpuacctRootDir =
+        new File(cgcpuacctDir, "/yarn");
+    File cgmemoryDir =
+        new File(basePath + "/memory");
+    File cgMemoryRootDir =
+        new File(cgmemoryDir, "/yarn");
+    File procfs = new File(basePath + "/1234");
+    when(cGroupsHandler.getControllerPath(
+        CGroupsHandler.CGroupController.MEMORY)).
+        thenReturn(cgmemoryDir.getAbsolutePath());
+    Assert.assertTrue("Setup error", procfs.mkdirs());
+    Assert.assertTrue("Setup error", cgcpuacctRootDir.mkdirs());
+    Assert.assertTrue("Setup error", cgMemoryRootDir.mkdirs());
+    try {
+      FileUtils.writeStringToFile(
+          new File(cgMemoryRootDir, CGroupsResourceCalculator.MEM_STAT),
+          "418496512\n");
+
+      CGroupsResourceCalculator calculator =
+          new CGroupsResourceCalculator(
+              null, basePath,
+              cGroupsHandler, clock, 10);
+      calculator.setCGroupFilePaths();
+
+      calculator.updateProcessTree();
+
+      // Test the case where memsw is not available (Ubuntu)
+      Assert.assertEquals("Incorrect memory usage",
+          418496512,
+          calculator.getRssMemorySize());
+      Assert.assertEquals("Incorrect swap usage",
+          (long)ResourceCalculatorProcessTree.UNAVAILABLE,
+          calculator.getVirtualMemorySize());
+
+      // Test the case where memsw is available
+      FileUtils.writeStringToFile(
+          new File(cgMemoryRootDir, CGroupsResourceCalculator.MEMSW_STAT),
+          "418496513\n");
+      calculator.updateProcessTree();
+      Assert.assertEquals("Incorrect swap usage",
+          418496513,
+          calculator.getVirtualMemorySize());
+    } finally {
+      FileUtils.deleteDirectory(new File(basePath));
+    }
+  }
+}

Một số tệp đã không được hiển thị bởi vì quá nhiều tập tin thay đổi trong này khác