Explorar o código

merge from trunk r1598783

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/fs-encryption@1598785 13f79535-47bb-0310-9956-ffa450edef68
Charles Lamb %!s(int64=11) %!d(string=hai) anos
pai
achega
871616b993
Modificáronse 81 ficheiros con 3023 adicións e 390 borrados
  1. 11 3
      hadoop-common-project/hadoop-common/CHANGES.txt
  2. 11 0
      hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemon.sh
  3. 3 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
  4. 0 4
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java
  5. 6 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
  6. 11 12
      hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
  7. 3 6
      hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSCacheKeyProvider.java
  8. 6 1
      hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java
  9. 9 7
      hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/NfsExports.java
  10. 7 18
      hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/IdUserGroup.java
  11. 2 5
      hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Base.java
  12. 12 30
      hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Constant.java
  13. 3 6
      hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/nfs/TestNfsExports.java
  14. 9 7
      hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/nfs/nfs3/TestIdUserGroup.java
  15. 5 0
      hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService.java
  16. 99 0
      hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/server/TestHttpFSServer.java
  17. 57 0
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/conf/NfsConfigKeys.java
  18. 54 0
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/conf/NfsConfiguration.java
  19. 3 3
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/Mountd.java
  20. 13 15
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/RpcProgramMountd.java
  21. 4 4
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java
  22. 8 13
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3.java
  23. 2 1
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java
  24. 6 6
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtxCache.java
  25. 6 7
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/PrivilegedNfsGatewayStarter.java
  26. 28 25
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java
  27. 13 12
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteManager.java
  28. 2 2
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestMountd.java
  29. 5 4
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestOutOfOrderWrite.java
  30. 2 2
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestReaddir.java
  31. 6 6
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestDFSClientCache.java
  32. 4 4
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestExportsTable.java
  33. 20 20
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestOpenFileCtxCache.java
  34. 42 0
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestRpcProgramNfs3.java
  35. 6 6
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java
  36. 2 2
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/resources/core-site.xml
  37. 11 1
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  38. 0 6
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
  39. 4 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclStorage.java
  40. 51 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
  41. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js
  42. 7 7
      hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm
  43. 12 0
      hadoop-yarn-project/CHANGES.txt
  44. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
  45. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/HtmlPage.java
  46. 17 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
  47. 1 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/TestSubViews.java
  48. 8 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/view/TestHtmlPage.java
  49. 5 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml
  50. 3 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
  51. 49 9
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
  52. 12 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
  53. 77 5
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalCacheDirectoryManager.java
  54. 0 10
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTracker.java
  55. 156 29
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java
  56. 20 3
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java
  57. 111 33
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java
  58. 3 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceEventType.java
  59. 43 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceRecoveredEvent.java
  60. 377 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java
  61. 74 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java
  62. 163 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java
  63. 31 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_recovery.proto
  64. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java
  65. 3 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java
  66. 30 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java
  67. 6 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
  68. 3 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java
  69. 47 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalCacheDirectoryManager.java
  70. 310 8
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalResourcesTrackerImpl.java
  71. 270 14
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java
  72. 4 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceRetention.java
  73. 171 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java
  74. 407 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java
  75. 3 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java
  76. 7 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java
  77. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java
  78. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java
  79. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java
  80. 11 4
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java
  81. 23 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStoreZKClientConnections.java

+ 11 - 3
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -125,9 +125,6 @@ Trunk (Unreleased)
     HADOOP-10342. Add a new method to UGI to use a Kerberos login subject to
     build a new UGI. (Larry McCay via omalley)
 
-    HADOOP-9968. Makes ProxyUsers to work with NetGroups (Benoy Antony via 
-    ddas)
-
     HADOOP-10237. JavaKeyStoreProvider needs to set keystore permissions 
     correctly. (Larry McCay via omalley)
 
@@ -337,6 +334,11 @@ Trunk (Unreleased)
     HADOOP-10625. Trim configuration names when putting/getting them
     to properties. (Wangda Tan via xgong)
 
+    HADOOP-10645. TestKMS fails because race condition writing acl files. (tucu)
+
+    HADOOP-10611. KMS, keyVersion name should not be assumed to be 
+    keyName@versionNumber. (tucu)
+
   OPTIMIZATIONS
 
     HADOOP-7761. Improve the performance of raw comparisons. (todd)
@@ -419,6 +421,9 @@ Release 2.5.0 - UNRELEASED
     HADOOP-10618. Remove SingleNodeSetup.apt.vm. (Akira Ajisaka via
     Arpit Agarwal)
 
+    HADOOP-9968. Makes ProxyUsers to work with NetGroups (Benoy Antony via 
+    ddas)
+
     HADOOP-10448. Support pluggable mechanism to specify proxy user settings.
     (Benoy Antony via Arpit Agarwal)
 
@@ -519,6 +524,9 @@ Release 2.5.0 - UNRELEASED
     HADOOP-10639. FileBasedKeyStoresFactory initialization is not using default
     for SSL_REQUIRE_CLIENT_CERT_KEY. (tucu)
 
+    HADOOP-10638. Updating hadoop-daemon.sh to work as expected when nfs is
+    started as a privileged user. (Manikandan Narayanaswamy via atm)
+
 Release 2.4.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 11 - 0
hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemon.sh

@@ -87,6 +87,14 @@ if [ "$command" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_
   starting_secure_dn="true"
 fi
 
+#Determine if we're starting a privileged NFS, if so, redefine the appropriate variables
+if [ "$command" == "nfs3" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_PRIVILEGED_NFS_USER" ]; then
+    export HADOOP_PID_DIR=$HADOOP_PRIVILEGED_NFS_PID_DIR
+    export HADOOP_LOG_DIR=$HADOOP_PRIVILEGED_NFS_LOG_DIR
+    export HADOOP_IDENT_STRING=$HADOOP_PRIVILEGED_NFS_USER
+    starting_privileged_nfs="true"
+fi
+
 if [ "$HADOOP_IDENT_STRING" = "" ]; then
   export HADOOP_IDENT_STRING="$USER"
 fi
@@ -162,6 +170,9 @@ case $startStop in
       echo "ulimit -a for secure datanode user $HADOOP_SECURE_DN_USER" >> $log
       # capture the ulimit info for the appropriate user
       su --shell=/bin/bash $HADOOP_SECURE_DN_USER -c 'ulimit -a' >> $log 2>&1
+    elif [ "true" = "$starting_privileged_nfs" ]; then
+        echo "ulimit -a for privileged nfs user $HADOOP_PRIVILEGED_NFS_USER" >> $log
+        su --shell=/bin/bash $HADOOP_PRIVILEGED_NFS_USER -c 'ulimit -a' >> $log 2>&1
     else
       echo "ulimit -a for user $USER" >> $log
       ulimit -a >> $log 2>&1

+ 3 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java

@@ -423,7 +423,9 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
       new DeprecationDelta("fs.default.name", 
         CommonConfigurationKeys.FS_DEFAULT_NAME_KEY),
       new DeprecationDelta("dfs.umaskmode",
-        CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY)
+        CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY),
+      new DeprecationDelta("dfs.nfs.exports.allowed.hosts",
+          CommonConfigurationKeys.NFS_EXPORTS_ALLOWED_HOSTS_KEY)
     };
 
   /**

+ 0 - 4
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java

@@ -517,8 +517,4 @@ public class KMSClientProvider extends KeyProvider {
     // the server should not keep in memory state on behalf of clients either.
   }
 
-  @VisibleForTesting
-  public static String buildVersionName(String name, int version) {
-    return KeyProvider.buildVersionName(name, version);
-  }
 }

+ 6 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java

@@ -265,4 +265,10 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
   public static final boolean RPC_METRICS_QUANTILE_ENABLE_DEFAULT = false;
   public static final String  RPC_METRICS_PERCENTILES_INTERVALS_KEY =
       "rpc.metrics.percentiles.intervals";
+  
+  /** Allowed hosts for nfs exports */
+  public static final String NFS_EXPORTS_ALLOWED_HOSTS_SEPARATOR = ";";
+  public static final String NFS_EXPORTS_ALLOWED_HOSTS_KEY = "nfs.exports.allowed.hosts";
+  public static final String NFS_EXPORTS_ALLOWED_HOSTS_KEY_DEFAULT = "* rw";
+
 }

+ 11 - 12
hadoop-common-project/hadoop-common/src/main/resources/core-default.xml

@@ -1309,18 +1309,17 @@
 </property>
 
 <property>
-  <name>nfs3.server.port</name>
-  <value>2049</value>
-  <description>
-      Specify the port number used by Hadoop NFS.
-  </description>
-</property>
-
-<property>
-  <name>nfs3.mountd.port</name>
-  <value>4242</value>
-  <description>
-      Specify the port number used by Hadoop mount daemon.
+  <name>nfs.exports.allowed.hosts</name>
+  <value>* rw</value>
+  <description>
+    By default, the export can be mounted by any client. The value string 
+    contains machine name and access privilege, separated by whitespace 
+    characters. The machine name format can be a single host, a Java regular 
+    expression, or an IPv4 address. The access privilege uses rw or ro to 
+    specify read/write or read-only access of the machines to exports. If the 
+    access privilege is not provided, the default is read-only. Entries are separated by ";".
+    For example: "192.168.0.0/22 rw ; host.*\.example\.com ; host1.test.org ro;".
+    Only the NFS gateway needs to restart after this property is updated. 
   </description>
 </property>
 

+ 3 - 6
hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSCacheKeyProvider.java

@@ -135,14 +135,11 @@ public class KMSCacheKeyProvider extends KeyProvider {
 
   @Override
   public void deleteKey(String name) throws IOException {
-    Metadata metadata = provider.getMetadata(name);
-    List<String> versions = new ArrayList<String>(metadata.getVersions());
-    for (int i = 0; i < metadata.getVersions(); i++) {
-      versions.add(KeyProvider.buildVersionName(name, i));
-    }
     provider.deleteKey(name);
     currentKeyCache.invalidate(name);
-    keyVersionCache.invalidateAll(versions);
+    // invalidating all key versions as we don't know which ones belonged to the
+    // deleted key
+    keyVersionCache.invalidateAll();
   }
 
   @Override

+ 6 - 1
hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java

@@ -577,7 +577,9 @@ public class TestKMS {
               Assert.fail(ex.toString());
             }
             try {
-              kp.getKeyVersion(KMSClientProvider.buildVersionName("k", 0));
+              // we are using JavaKeyStoreProvider for testing, so we know how
+              // the keyversion is created.
+              kp.getKeyVersion("k@0");
               Assert.fail();
             } catch (AuthorizationException ex) {
               //NOP
@@ -717,6 +719,9 @@ public class TestKMS {
           }
         });
 
+        //stop the reloader, to avoid running while we are writing the new file
+        KMSWebApp.getACLs().stopReloader();
+
         // test ACL reloading
         Thread.sleep(10); // to ensure the ACLs file modifiedTime is newer
         conf.set(KMSACLs.Type.CREATE.getConfigKey(), "foo");

+ 9 - 7
hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/NfsExports.java

@@ -27,6 +27,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.commons.net.util.SubnetUtils;
 import org.apache.commons.net.util.SubnetUtils.SubnetInfo;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
 import org.apache.hadoop.util.LightWeightCache;
 import org.apache.hadoop.util.LightWeightGSet;
@@ -44,13 +45,14 @@ public class NfsExports {
   
   public static synchronized NfsExports getInstance(Configuration conf) {
     if (exports == null) {
-      String matchHosts = conf.get(Nfs3Constant.EXPORTS_ALLOWED_HOSTS_KEY,
-          Nfs3Constant.EXPORTS_ALLOWED_HOSTS_KEY_DEFAULT);
-      int cacheSize = conf.getInt(Nfs3Constant.EXPORTS_CACHE_SIZE_KEY,
-          Nfs3Constant.EXPORTS_CACHE_SIZE_DEFAULT);
+      String matchHosts = conf.get(
+          CommonConfigurationKeys.NFS_EXPORTS_ALLOWED_HOSTS_KEY,
+          CommonConfigurationKeys.NFS_EXPORTS_ALLOWED_HOSTS_KEY_DEFAULT);
+      int cacheSize = conf.getInt(Nfs3Constant.NFS_EXPORTS_CACHE_SIZE_KEY,
+          Nfs3Constant.NFS_EXPORTS_CACHE_SIZE_DEFAULT);
       long expirationPeriodNano = conf.getLong(
-          Nfs3Constant.EXPORTS_CACHE_EXPIRYTIME_MILLIS_KEY,
-          Nfs3Constant.EXPORTS_CACHE_EXPIRYTIME_MILLIS_DEFAULT) * 1000 * 1000;
+          Nfs3Constant.NFS_EXPORTS_CACHE_EXPIRYTIME_MILLIS_KEY,
+          Nfs3Constant.NFS_EXPORTS_CACHE_EXPIRYTIME_MILLIS_DEFAULT) * 1000 * 1000;
       exports = new NfsExports(cacheSize, expirationPeriodNano, matchHosts);
     }
     return exports;
@@ -140,7 +142,7 @@ public class NfsExports {
     accessCache = new LightWeightCache<AccessCacheEntry, AccessCacheEntry>(
         cacheSize, cacheSize, expirationPeriodNano, 0);        
     String[] matchStrings = matchHosts.split(
-        Nfs3Constant.EXPORTS_ALLOWED_HOSTS_SEPARATOR);
+        CommonConfigurationKeys.NFS_EXPORTS_ALLOWED_HOSTS_SEPARATOR);
     mMatches = new ArrayList<Match>(matchStrings.length);
     for(String mStr : matchStrings) {
       if (LOG.isDebugEnabled()) {

+ 7 - 18
hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/IdUserGroup.java

@@ -50,9 +50,6 @@ public class IdUserGroup {
   static final String MAC_GET_ALL_USERS_CMD = "dscl . -list /Users UniqueID";
   static final String MAC_GET_ALL_GROUPS_CMD = "dscl . -list /Groups PrimaryGroupID";
 
-  // Used for finding the configured static mapping file.
-  static final String NFS_STATIC_MAPPING_FILE_KEY = "dfs.nfs.static.mapping.file";
-  private static final String NFS_STATIC_MAPPING_FILE_DEFAULT = "/etc/nfs.map";
   private final File staticMappingFile;
 
   // Used for parsing the static mapping file.
@@ -61,11 +58,7 @@ public class IdUserGroup {
   private static final Pattern MAPPING_LINE =
       Pattern.compile("^(uid|gid)\\s+(\\d+)\\s+(\\d+)\\s*(#.*)?$");
 
-  // Do update every 15 minutes by default
-  final static long TIMEOUT_DEFAULT = 15 * 60 * 1000; // ms
-  final static long TIMEOUT_MIN = 1 * 60 * 1000; // ms
   final private long timeout;
-  final static String NFS_USERUPDATE_MILLY = "hadoop.nfs.userupdate.milly";
   
   // Maps for id to name map. Guarded by this object monitor lock
   private BiMap<Integer, String> uidNameMap = HashBiMap.create();
@@ -73,25 +66,21 @@ public class IdUserGroup {
 
   private long lastUpdateTime = 0; // Last time maps were updated
   
-  public IdUserGroup() throws IOException {
-    timeout = TIMEOUT_DEFAULT;
-    staticMappingFile = new File(NFS_STATIC_MAPPING_FILE_DEFAULT);
-    updateMaps();
-  }
-  
   public IdUserGroup(Configuration conf) throws IOException {
-    long updateTime = conf.getLong(NFS_USERUPDATE_MILLY, TIMEOUT_DEFAULT);
+    long updateTime = conf.getLong(
+        Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_KEY,
+        Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_DEFAULT);
     // Minimal interval is 1 minute
-    if (updateTime < TIMEOUT_MIN) {
+    if (updateTime < Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_MIN) {
       LOG.info("User configured user account update time is less"
           + " than 1 minute. Use 1 minute instead.");
-      timeout = TIMEOUT_MIN;
+      timeout = Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_MIN;
     } else {
       timeout = updateTime;
     }
     
-    String staticFilePath = conf.get(NFS_STATIC_MAPPING_FILE_KEY,
-        NFS_STATIC_MAPPING_FILE_DEFAULT);
+    String staticFilePath = conf.get(Nfs3Constant.NFS_STATIC_MAPPING_FILE_KEY,
+        Nfs3Constant.NFS_STATIC_MAPPING_FILE_DEFAULT);
     staticMappingFile = new File(staticFilePath);
     
     updateMaps();

+ 2 - 5
hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Base.java

@@ -33,7 +33,6 @@ import org.apache.hadoop.util.ShutdownHookManager;
 public abstract class Nfs3Base {
   public static final Log LOG = LogFactory.getLog(Nfs3Base.class);
   private final RpcProgram rpcProgram;
-  private final int nfsPort;
   private int nfsBoundPort; // Will set after server starts
     
   public RpcProgram getRpcProgram() {
@@ -42,9 +41,7 @@ public abstract class Nfs3Base {
 
   protected Nfs3Base(RpcProgram rpcProgram, Configuration conf) {
     this.rpcProgram = rpcProgram;
-    this.nfsPort = conf.getInt(Nfs3Constant.NFS3_SERVER_PORT,
-        Nfs3Constant.NFS3_SERVER_PORT_DEFAULT);
-    LOG.info("NFS server port set to: " + nfsPort);
+    LOG.info("NFS server port set to: " + rpcProgram.getPort());
   }
 
   public void start(boolean register) {
@@ -58,7 +55,7 @@ public abstract class Nfs3Base {
   }
 
   private void startTCPServer() {
-    SimpleTcpServer tcpServer = new SimpleTcpServer(nfsPort,
+    SimpleTcpServer tcpServer = new SimpleTcpServer(rpcProgram.getPort(),
         rpcProgram, 0);
     rpcProgram.startDaemons();
     tcpServer.run();

+ 12 - 30
hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Constant.java

@@ -25,10 +25,6 @@ public class Nfs3Constant {
   // The local rpcbind/portmapper port.
   public final static int SUN_RPCBIND = 111;
 
-  // The IP port number for NFS.
-  public final static String NFS3_SERVER_PORT = "nfs3.server.port";
-  public final static int NFS3_SERVER_PORT_DEFAULT = 2049;
-
   // The RPC program number for NFS.
   public final static int PROGRAM = 100003;
 
@@ -191,36 +187,22 @@ public class Nfs3Constant {
   public final static int CREATE_GUARDED = 1;
   public final static int CREATE_EXCLUSIVE = 2;
   
-  public static final String EXPORTS_ALLOWED_HOSTS_SEPARATOR = ";";
-  /** Allowed hosts for nfs exports */
-  public static final String EXPORTS_ALLOWED_HOSTS_KEY = "dfs.nfs.exports.allowed.hosts";
-  public static final String EXPORTS_ALLOWED_HOSTS_KEY_DEFAULT = "* rw";
   /** Size for nfs exports cache */
-  public static final String EXPORTS_CACHE_SIZE_KEY = "dfs.nfs.exports.cache.size";
-  public static final int EXPORTS_CACHE_SIZE_DEFAULT = 512;
+  public static final String NFS_EXPORTS_CACHE_SIZE_KEY = "nfs.exports.cache.size";
+  public static final int NFS_EXPORTS_CACHE_SIZE_DEFAULT = 512;
   /** Expiration time for nfs exports cache entry */
-  public static final String EXPORTS_CACHE_EXPIRYTIME_MILLIS_KEY = "dfs.nfs.exports.cache.expirytime.millis";
-  public static final long EXPORTS_CACHE_EXPIRYTIME_MILLIS_DEFAULT = 15 * 60 * 1000; // 15 min
-
-  public static final String FILE_DUMP_DIR_KEY = "dfs.nfs3.dump.dir";
-  public static final String FILE_DUMP_DIR_DEFAULT = "/tmp/.hdfs-nfs";
-  public static final String ENABLE_FILE_DUMP_KEY = "dfs.nfs3.enableDump";
-  public static final boolean ENABLE_FILE_DUMP_DEFAULT = true;
-  public static final String MAX_READ_TRANSFER_SIZE_KEY = "dfs.nfs.rtmax";
-  public static final int MAX_READ_TRANSFER_SIZE_DEFAULT = 1024 * 1024;
-  public static final String MAX_WRITE_TRANSFER_SIZE_KEY = "dfs.nfs.wtmax";
-  public static final int MAX_WRITE_TRANSFER_SIZE_DEFAULT = 1024 * 1024;
-  public static final String MAX_READDIR_TRANSFER_SIZE_KEY = "dfs.nfs.dtmax";
-  public static final int MAX_READDIR_TRANSFER_SIZE_DEFAULT = 64 * 1024;
-  public static final String MAX_OPEN_FILES = "dfs.nfs3.max.open.files";
-  public static final int MAX_OPEN_FILES_DEFAULT = 256;
-  public static final String OUTPUT_STREAM_TIMEOUT = "dfs.nfs3.stream.timeout";
-  public static final long OUTPUT_STREAM_TIMEOUT_DEFAULT = 10 * 60 * 1000; // 10 minutes
-  public static final long OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT = 10 * 1000; //10 seconds
+  public static final String NFS_EXPORTS_CACHE_EXPIRYTIME_MILLIS_KEY = "nfs.exports.cache.expirytime.millis";
+  public static final long NFS_EXPORTS_CACHE_EXPIRYTIME_MILLIS_DEFAULT = 15 * 60 * 1000; // 15 min
+
+  /** Do user/group update every 15 minutes by default, minimum 1 minute */
+  public final static String NFS_USERGROUP_UPDATE_MILLIS_KEY = "nfs.usergroup.update.millis";
+  public final static long NFS_USERGROUP_UPDATE_MILLIS_DEFAULT = 15 * 60 * 1000; // ms
+  final static long NFS_USERGROUP_UPDATE_MILLIS_MIN = 1 * 60 * 1000; // ms
   
   public final static String UNKNOWN_USER = "nobody";
   public final static String UNKNOWN_GROUP = "nobody";
   
-  public final static String EXPORT_POINT = "dfs.nfs3.export.point";
-  public final static String EXPORT_POINT_DEFAULT = "/";
+  // Used for finding the configured static mapping file.
+  public static final String NFS_STATIC_MAPPING_FILE_KEY = "nfs.static.mapping.file";
+  public static final String NFS_STATIC_MAPPING_FILE_DEFAULT = "/etc/nfs.map";
 }

+ 3 - 6
hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/nfs/TestNfsExports.java

@@ -17,11 +17,8 @@
  */
 package org.apache.hadoop.nfs;
 
-import org.junit.Assert;
-
-import org.apache.hadoop.nfs.AccessPrivilege;
-import org.apache.hadoop.nfs.NfsExports;
 import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
+import org.junit.Assert;
 import org.junit.Test;
 
 public class TestNfsExports {
@@ -32,9 +29,9 @@ public class TestNfsExports {
   private final String hostname2 = "a.b.org";
   
   private static final long ExpirationPeriod = 
-      Nfs3Constant.EXPORTS_CACHE_EXPIRYTIME_MILLIS_DEFAULT * 1000 * 1000;
+      Nfs3Constant.NFS_EXPORTS_CACHE_EXPIRYTIME_MILLIS_DEFAULT * 1000 * 1000;
   
-  private static final int CacheSize = Nfs3Constant.EXPORTS_CACHE_SIZE_DEFAULT;
+  private static final int CacheSize = Nfs3Constant.NFS_EXPORTS_CACHE_SIZE_DEFAULT;
   private static final long NanosPerMillis = 1000000;
 
   @Test

+ 9 - 7
hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/nfs/nfs3/TestIdUserGroup.java

@@ -199,17 +199,19 @@ public class TestIdUserGroup {
 
   @Test
   public void testUserUpdateSetting() throws IOException {
-    IdUserGroup iug = new IdUserGroup();
-    assertEquals(iug.getTimeout(), IdUserGroup.TIMEOUT_DEFAULT);
+    IdUserGroup iug = new IdUserGroup(new Configuration());
+    assertEquals(iug.getTimeout(),
+        Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_DEFAULT);
 
     Configuration conf = new Configuration();
-    conf.setLong(IdUserGroup.NFS_USERUPDATE_MILLY, 0);
+    conf.setLong(Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_KEY, 0);
     iug = new IdUserGroup(conf);
-    assertEquals(iug.getTimeout(), IdUserGroup.TIMEOUT_MIN);
+    assertEquals(iug.getTimeout(), Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_MIN);
 
-    conf.setLong(IdUserGroup.NFS_USERUPDATE_MILLY,
-        IdUserGroup.TIMEOUT_DEFAULT * 2);
+    conf.setLong(Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_KEY,
+        Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_DEFAULT * 2);
     iug = new IdUserGroup(conf);
-    assertEquals(iug.getTimeout(), IdUserGroup.TIMEOUT_DEFAULT * 2);
+    assertEquals(iug.getTimeout(),
+        Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_DEFAULT * 2);
   }
 }

+ 5 - 0
hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService.java

@@ -23,6 +23,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.lib.server.BaseService;
 import org.apache.hadoop.lib.server.ServiceException;
 import org.apache.hadoop.lib.service.FileSystemAccess;
@@ -395,6 +396,10 @@ public class FileSystemAccessService extends BaseService implements FileSystemAc
     Configuration conf = new Configuration(true);
     ConfigurationUtils.copy(serviceHadoopConf, conf);
     conf.setBoolean(FILE_SYSTEM_SERVICE_CREATED, true);
+
+    // Force-clear server-side umask to make HttpFS match WebHDFS behavior
+    conf.set(FsPermission.UMASK_LABEL, "000");
+
     return conf;
   }
 

+ 99 - 0
hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/server/TestHttpFSServer.java

@@ -231,6 +231,105 @@ public class TestHttpFSServer extends HFSTestCase {
     reader.close();
   }
 
+  /**
+   * Talks to the http interface to create a file.
+   *
+   * @param filename The file to create
+   * @param perms The permission field, if any (may be null)
+   * @throws Exception
+   */
+  private void createWithHttp ( String filename, String perms )
+          throws Exception {
+    String user = HadoopUsersConfTestHelper.getHadoopUsers()[0];
+    String pathOps;
+    if ( perms == null ) {
+      pathOps = MessageFormat.format(
+              "/webhdfs/v1/{0}?user.name={1}&op=CREATE",
+              filename, user);
+    } else {
+      pathOps = MessageFormat.format(
+              "/webhdfs/v1/{0}?user.name={1}&permission={2}&op=CREATE",
+              filename, user, perms);
+    }
+    URL url = new URL(TestJettyHelper.getJettyURL(), pathOps);
+    HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+    conn.addRequestProperty("Content-Type", "application/octet-stream");
+    conn.setRequestMethod("PUT");
+    conn.connect();
+    Assert.assertEquals(HttpURLConnection.HTTP_CREATED, conn.getResponseCode());
+  }
+
+  /**
+   * Talks to the http interface to get the json output of the GETFILESTATUS
+   * command on the given file.
+   *
+   * @param filename The file to query.
+   * @return A string containing the JSON output describing the file.
+   * @throws Exception
+   */
+  private String getFileStatus ( String filename ) throws Exception {
+    String user = HadoopUsersConfTestHelper.getHadoopUsers()[0];
+    String pathOps = MessageFormat.format(
+            "/webhdfs/v1/{0}?user.name={1}&op=GETFILESTATUS",
+            filename, user);
+    URL url = new URL(TestJettyHelper.getJettyURL(), pathOps);
+    HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+    conn.connect();
+    Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode());
+
+    BufferedReader reader =
+            new BufferedReader(new InputStreamReader(conn.getInputStream()));
+
+    return reader.readLine();
+  }
+
+  /**
+   * Given the JSON output from the GETFILESTATUS call, return the
+   * 'permission' value.
+   *
+   * @param statusJson JSON from GETFILESTATUS
+   * @return The value of 'permission' in statusJson
+   * @throws Exception
+   */
+  private String getPerms ( String statusJson ) throws Exception {
+    JSONParser parser = new JSONParser();
+    JSONObject jsonObject = (JSONObject) parser.parse(statusJson);
+    JSONObject details = (JSONObject) jsonObject.get("FileStatus");
+    return (String) details.get("permission");
+  }
+
+  /**
+   * Validate that files are created with 755 permissions when no
+   * 'permissions' attribute is specified, and when 'permissions'
+   * is specified, that value is honored.
+   */
+  @Test
+  @TestDir
+  @TestJetty
+  @TestHdfs
+  public void testPerms() throws Exception {
+    createHttpFSServer(false);
+
+    FileSystem fs = FileSystem.get(TestHdfsHelper.getHdfsConf());
+    fs.mkdirs(new Path("/perm"));
+
+    createWithHttp("/perm/none", null);
+    String statusJson = getFileStatus("/perm/none");
+    Assert.assertTrue("755".equals(getPerms(statusJson)));
+
+    createWithHttp("/perm/p-777", "777");
+    statusJson = getFileStatus("/perm/p-777");
+    Assert.assertTrue("777".equals(getPerms(statusJson)));
+
+    createWithHttp("/perm/p-654", "654");
+    statusJson = getFileStatus("/perm/p-654");
+    Assert.assertTrue("654".equals(getPerms(statusJson)));
+
+    createWithHttp("/perm/p-321", "321");
+    statusJson = getFileStatus("/perm/p-321");
+    Assert.assertTrue("321".equals(getPerms(statusJson)));
+  }
+
   @Test
   @TestDir
   @TestJetty

+ 57 - 0
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/conf/NfsConfigKeys.java

@@ -0,0 +1,57 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.hdfs.nfs.conf;
+
+public class NfsConfigKeys {
+
+  // The IP port number for NFS and mountd.
+  public final static String DFS_NFS_SERVER_PORT_KEY = "nfs.server.port";
+  public final static int DFS_NFS_SERVER_PORT_DEFAULT = 2049;
+  public final static String DFS_NFS_MOUNTD_PORT_KEY = "nfs.mountd.port";
+  public final static int DFS_NFS_MOUNTD_PORT_DEFAULT = 4242;
+  
+  public static final String DFS_NFS_FILE_DUMP_KEY = "nfs.file.dump";
+  public static final boolean DFS_NFS_FILE_DUMP_DEFAULT = true;
+  public static final String DFS_NFS_FILE_DUMP_DIR_KEY = "nfs.file.dump.dir";
+  public static final String DFS_NFS_FILE_DUMP_DIR_DEFAULT = "/tmp/.hdfs-nfs";
+  
+  public static final String DFS_NFS_MAX_READ_TRANSFER_SIZE_KEY = "nfs.rtmax";
+  public static final int DFS_NFS_MAX_READ_TRANSFER_SIZE_DEFAULT = 1024 * 1024;
+  public static final String DFS_NFS_MAX_WRITE_TRANSFER_SIZE_KEY = "nfs.wtmax";
+  public static final int DFS_NFS_MAX_WRITE_TRANSFER_SIZE_DEFAULT = 1024 * 1024;
+  public static final String DFS_NFS_MAX_READDIR_TRANSFER_SIZE_KEY = "nfs.dtmax";
+  public static final int DFS_NFS_MAX_READDIR_TRANSFER_SIZE_DEFAULT = 64 * 1024;
+
+  public static final String DFS_NFS_MAX_OPEN_FILES_KEY = "nfs.max.open.files";
+  public static final int DFS_NFS_MAX_OPEN_FILES_DEFAULT = 256;
+
+  public static final String DFS_NFS_STREAM_TIMEOUT_KEY = "nfs.stream.timeout";
+  public static final long DFS_NFS_STREAM_TIMEOUT_DEFAULT = 10 * 60 * 1000; // 10 minutes
+  public static final long DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT = 10 * 1000; // 10 seconds
+
+  public final static String DFS_NFS_EXPORT_POINT_KEY = "nfs.export.point";
+  public final static String DFS_NFS_EXPORT_POINT_DEFAULT = "/";
+  
+  public static final String DFS_NFS_KEYTAB_FILE_KEY = "nfs.keytab.file";
+  public static final String DFS_NFS_KERBEROS_PRINCIPAL_KEY = "nfs.kerberos.principal";
+  public static final String DFS_NFS_REGISTRATION_PORT_KEY = "nfs.registration.port";
+  public static final int    DFS_NFS_REGISTRATION_PORT_DEFAULT = 40; // Currently unassigned.
+  public static final String  DFS_NFS_ALLOW_INSECURE_PORTS_KEY = "nfs.allow.insecure.ports";
+  public static final boolean DFS_NFS_ALLOW_INSECURE_PORTS_DEFAULT = true;
+}

+ 54 - 0
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/conf/NfsConfiguration.java

@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.nfs.conf;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
+
+/**
+ * Adds deprecated keys into the configuration.
+ */
+public class NfsConfiguration extends HdfsConfiguration {
+  static {
+    addDeprecatedKeys();
+  }
+
+  private static void addDeprecatedKeys() {
+    Configuration.addDeprecations(new DeprecationDelta[] {
+        new DeprecationDelta("nfs3.server.port",
+            NfsConfigKeys.DFS_NFS_SERVER_PORT_KEY),
+        new DeprecationDelta("nfs3.mountd.port",
+            NfsConfigKeys.DFS_NFS_MOUNTD_PORT_KEY),
+        new DeprecationDelta("dfs.nfs.exports.cache.expirytime.millis",
+            Nfs3Constant.NFS_EXPORTS_CACHE_EXPIRYTIME_MILLIS_KEY),
+        new DeprecationDelta("hadoop.nfs.userupdate.milly",
+            Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_KEY),
+        new DeprecationDelta("dfs.nfs3.enableDump",
+            NfsConfigKeys.DFS_NFS_FILE_DUMP_KEY),
+        new DeprecationDelta("dfs.nfs3.dump.dir",
+            NfsConfigKeys.DFS_NFS_FILE_DUMP_DIR_KEY),
+        new DeprecationDelta("dfs.nfs3.max.open.files",
+            NfsConfigKeys.DFS_NFS_MAX_OPEN_FILES_KEY),
+        new DeprecationDelta("dfs.nfs3.stream.timeout",
+            NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_KEY),
+        new DeprecationDelta("dfs.nfs3.export.point",
+            NfsConfigKeys.DFS_NFS_EXPORT_POINT_KEY) });
+  }
+}

+ 3 - 3
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/Mountd.java

@@ -20,7 +20,7 @@ package org.apache.hadoop.hdfs.nfs.mount;
 import java.io.IOException;
 import java.net.DatagramSocket;
 
-import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.mount.MountdBase;
 
 /**
@@ -32,13 +32,13 @@ import org.apache.hadoop.mount.MountdBase;
  */
 public class Mountd extends MountdBase {
 
-  public Mountd(Configuration config, DatagramSocket registrationSocket,
+  public Mountd(NfsConfiguration config, DatagramSocket registrationSocket,
       boolean allowInsecurePorts) throws IOException {
     super(new RpcProgramMountd(config, registrationSocket, allowInsecurePorts));
   }
   
   public static void main(String[] args) throws IOException {
-    Configuration config = new Configuration();
+    NfsConfiguration config = new NfsConfiguration();
     Mountd mountd = new Mountd(config, null, true);
     mountd.start(true);
   }

+ 13 - 15
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/RpcProgramMountd.java

@@ -16,9 +16,6 @@
  * limitations under the License.
  */
 package org.apache.hadoop.hdfs.nfs.mount;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NFS_KEYTAB_FILE_KEY;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NFS_KERBEROS_PRINCIPAL_KEY;
-
 import java.io.IOException;
 import java.net.DatagramSocket;
 import java.net.InetAddress;
@@ -29,8 +26,9 @@ import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSClient;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.mount.MountEntry;
@@ -39,7 +37,6 @@ import org.apache.hadoop.mount.MountResponse;
 import org.apache.hadoop.nfs.AccessPrivilege;
 import org.apache.hadoop.nfs.NfsExports;
 import org.apache.hadoop.nfs.nfs3.FileHandle;
-import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
 import org.apache.hadoop.nfs.nfs3.Nfs3Status;
 import org.apache.hadoop.oncrpc.RpcAcceptedReply;
 import org.apache.hadoop.oncrpc.RpcCall;
@@ -66,9 +63,7 @@ public class RpcProgramMountd extends RpcProgram implements MountInterface {
   public static final int VERSION_1 = 1;
   public static final int VERSION_2 = 2;
   public static final int VERSION_3 = 3;
-  public static final int PORT = 4242;
 
-  // Need DFSClient for branch-1 to get ExtendedHdfsFileStatus
   private final DFSClient dfsClient;
   
   /** Synchronized list */
@@ -79,19 +74,22 @@ public class RpcProgramMountd extends RpcProgram implements MountInterface {
   
   private final NfsExports hostsMatcher;
 
-  public RpcProgramMountd(Configuration config, DatagramSocket registrationSocket,
-      boolean allowInsecurePorts) throws IOException {
+  public RpcProgramMountd(NfsConfiguration config,
+      DatagramSocket registrationSocket, boolean allowInsecurePorts)
+      throws IOException {
     // Note that RPC cache is not enabled
-    super("mountd", "localhost", config.getInt("nfs3.mountd.port", PORT),
-        PROGRAM, VERSION_1, VERSION_3, registrationSocket, allowInsecurePorts);
+    super("mountd", "localhost", config.getInt(
+        NfsConfigKeys.DFS_NFS_MOUNTD_PORT_KEY,
+        NfsConfigKeys.DFS_NFS_MOUNTD_PORT_DEFAULT), PROGRAM, VERSION_1,
+        VERSION_3, registrationSocket, allowInsecurePorts);
     exports = new ArrayList<String>();
-    exports.add(config.get(Nfs3Constant.EXPORT_POINT,
-        Nfs3Constant.EXPORT_POINT_DEFAULT));
+    exports.add(config.get(NfsConfigKeys.DFS_NFS_EXPORT_POINT_KEY,
+        NfsConfigKeys.DFS_NFS_EXPORT_POINT_DEFAULT));
     this.hostsMatcher = NfsExports.getInstance(config);
     this.mounts = Collections.synchronizedList(new ArrayList<MountEntry>());
     UserGroupInformation.setConfiguration(config);
-    SecurityUtil.login(config, DFS_NFS_KEYTAB_FILE_KEY,
-            DFS_NFS_KERBEROS_PRINCIPAL_KEY);
+    SecurityUtil.login(config, NfsConfigKeys.DFS_NFS_KEYTAB_FILE_KEY,
+        NfsConfigKeys.DFS_NFS_KERBEROS_PRINCIPAL_KEY);
     this.dfsClient = new DFSClient(NameNode.getAddress(config), config);
   }
   

+ 4 - 4
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java

@@ -30,10 +30,10 @@ import java.util.concurrent.TimeUnit;
 
 import com.google.common.base.Preconditions;
 import org.apache.commons.logging.Log;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSInputStream;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.io.MultipleIOException;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -72,7 +72,7 @@ class DFSClientCache {
   final static int DEFAULT_DFS_INPUTSTREAM_CACHE_SIZE = 1024;
   final static int DEFAULT_DFS_INPUTSTREAM_CACHE_TTL = 10 * 60;
 
-  private final Configuration config;
+  private final NfsConfiguration config;
 
   private static class DFSInputStreamCaheKey {
     final String userId;
@@ -99,11 +99,11 @@ class DFSClientCache {
     }
   }
 
-  DFSClientCache(Configuration config) {
+  DFSClientCache(NfsConfiguration config) {
     this(config, DEFAULT_DFS_CLIENT_CACHE_SIZE);
   }
   
-  DFSClientCache(Configuration config, int clientCache) {
+  DFSClientCache(NfsConfiguration config, int clientCache) {
     this.config = config;
     this.clientCache = CacheBuilder.newBuilder()
         .maximumSize(clientCache)

+ 8 - 13
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3.java

@@ -20,8 +20,8 @@ package org.apache.hadoop.hdfs.nfs.nfs3;
 import java.io.IOException;
 import java.net.DatagramSocket;
 
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.nfs.mount.Mountd;
 import org.apache.hadoop.nfs.nfs3.Nfs3Base;
 import org.apache.hadoop.util.StringUtils;
@@ -36,16 +36,11 @@ import com.google.common.annotations.VisibleForTesting;
 public class Nfs3 extends Nfs3Base {
   private Mountd mountd;
   
-  static {
-    Configuration.addDefaultResource("hdfs-default.xml");
-    Configuration.addDefaultResource("hdfs-site.xml");
-  }
-  
-  public Nfs3(Configuration conf) throws IOException {
+  public Nfs3(NfsConfiguration conf) throws IOException {
     this(conf, null, true);
   }
   
-  public Nfs3(Configuration conf, DatagramSocket registrationSocket,
+  public Nfs3(NfsConfiguration conf, DatagramSocket registrationSocket,
       boolean allowInsecurePorts) throws IOException {
     super(new RpcProgramNfs3(conf, registrationSocket, allowInsecurePorts), conf);
     mountd = new Mountd(conf, registrationSocket, allowInsecurePorts);
@@ -64,11 +59,11 @@ public class Nfs3 extends Nfs3Base {
   static void startService(String[] args,
       DatagramSocket registrationSocket) throws IOException {
     StringUtils.startupShutdownMessage(Nfs3.class, args, LOG);
-    Configuration conf = new Configuration();
+    NfsConfiguration conf = new NfsConfiguration();
     boolean allowInsecurePorts = conf.getBoolean(
-        DFSConfigKeys.DFS_NFS_ALLOW_INSECURE_PORTS_KEY,
-        DFSConfigKeys.DFS_NFS_ALLOW_INSECURE_PORTS_DEFAULT);
-    final Nfs3 nfsServer = new Nfs3(new Configuration(), registrationSocket,
+        NfsConfigKeys.DFS_NFS_ALLOW_INSECURE_PORTS_KEY,
+        NfsConfigKeys.DFS_NFS_ALLOW_INSECURE_PORTS_DEFAULT);
+    final Nfs3 nfsServer = new Nfs3(conf, registrationSocket,
         allowInsecurePorts);
     nfsServer.startServiceInternal(true);
   }

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java

@@ -37,6 +37,7 @@ import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
 import org.apache.hadoop.hdfs.nfs.nfs3.WriteCtx.DataState;
 import org.apache.hadoop.io.BytesWritable.Comparator;
 import org.apache.hadoop.io.IOUtils;
@@ -823,7 +824,7 @@ class OpenFileCtx {
    */
   public synchronized boolean streamCleanup(long fileId, long streamTimeout) {
     Preconditions
-        .checkState(streamTimeout >= Nfs3Constant.OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT);
+        .checkState(streamTimeout >= NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT);
     if (!activeState) {
       return true;
     }

+ 6 - 6
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtxCache.java

@@ -24,9 +24,9 @@ import java.util.concurrent.ConcurrentMap;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.nfs.nfs3.FileHandle;
-import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
 import org.apache.hadoop.util.Daemon;
 import org.apache.hadoop.util.Time;
 
@@ -48,9 +48,9 @@ class OpenFileCtxCache {
   private final long streamTimeout;
   private final StreamMonitor streamMonitor;
 
-  OpenFileCtxCache(Configuration config, long streamTimeout) {
-    maxStreams = config.getInt(Nfs3Constant.MAX_OPEN_FILES,
-        Nfs3Constant.MAX_OPEN_FILES_DEFAULT);
+  OpenFileCtxCache(NfsConfiguration config, long streamTimeout) {
+    maxStreams = config.getInt(NfsConfigKeys.DFS_NFS_MAX_OPEN_FILES_KEY,
+        NfsConfigKeys.DFS_NFS_MAX_OPEN_FILES_DEFAULT);
     LOG.info("Maximum open streams is " + maxStreams);
     this.streamTimeout = streamTimeout;
     streamMonitor = new StreamMonitor();
@@ -102,7 +102,7 @@ class OpenFileCtxCache {
     } else {
       long idleTime = Time.monotonicNow()
           - idlest.getValue().getLastAccessTime();
-      if (idleTime < Nfs3Constant.OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT) {
+      if (idleTime < NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT) {
         if (LOG.isDebugEnabled()) {
           LOG.debug("idlest stream's idle time:" + idleTime);
         }

+ 6 - 7
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/PrivilegedNfsGatewayStarter.java

@@ -21,9 +21,8 @@ import java.net.InetSocketAddress;
 
 import org.apache.commons.daemon.Daemon;
 import org.apache.commons.daemon.DaemonContext;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hdfs.DFSConfigKeys;
-import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 
 /**
  * This class is used to allow the initial registration of the NFS gateway with
@@ -42,12 +41,12 @@ public class PrivilegedNfsGatewayStarter implements Daemon {
   @Override
   public void init(DaemonContext context) throws Exception {
     System.err.println("Initializing privileged NFS client socket...");
-    Configuration conf = new HdfsConfiguration();
-    int clientPort = conf.getInt(DFSConfigKeys.DFS_NFS_REGISTRATION_PORT_KEY,
-        DFSConfigKeys.DFS_NFS_REGISTRATION_PORT_DEFAULT);
+    NfsConfiguration conf = new NfsConfiguration();
+    int clientPort = conf.getInt(NfsConfigKeys.DFS_NFS_REGISTRATION_PORT_KEY,
+        NfsConfigKeys.DFS_NFS_REGISTRATION_PORT_DEFAULT);
     if (clientPort < 1 || clientPort > 1023) {
       throw new RuntimeException("Must start privileged NFS server with '" +
-          DFSConfigKeys.DFS_NFS_REGISTRATION_PORT_KEY + "' configured to a " +
+          NfsConfigKeys.DFS_NFS_REGISTRATION_PORT_KEY + "' configured to a " +
           "privileged port.");
     }
     registrationSocket = new DatagramSocket(

+ 28 - 25
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java

@@ -28,7 +28,6 @@ import java.util.EnumSet;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.CreateFlag;
 import org.apache.hadoop.fs.DirectoryListingStartAfterNotFoundException;
@@ -41,6 +40,8 @@ import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
@@ -131,9 +132,6 @@ import org.jboss.netty.channel.ChannelHandlerContext;
 
 import com.google.common.annotations.VisibleForTesting;
 
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NFS_KEYTAB_FILE_KEY;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NFS_KERBEROS_PRINCIPAL_KEY;
-
 /**
  * RPC program corresponding to nfs daemon. See {@link Nfs3}.
  */
@@ -144,7 +142,7 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
   
   static final Log LOG = LogFactory.getLog(RpcProgramNfs3.class);
 
-  private final Configuration config = new Configuration();
+  private final NfsConfiguration config;
   private final WriteManager writeManager;
   private final IdUserGroup iug;
   private final DFSClientCache clientCache;
@@ -159,15 +157,17 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
   
   private final RpcCallCache rpcCallCache;
 
-  public RpcProgramNfs3(Configuration config, DatagramSocket registrationSocket,
+  public RpcProgramNfs3(NfsConfiguration config, DatagramSocket registrationSocket,
       boolean allowInsecurePorts) throws IOException {
-    super("NFS3", "localhost", config.getInt(Nfs3Constant.NFS3_SERVER_PORT,
-        Nfs3Constant.NFS3_SERVER_PORT_DEFAULT), Nfs3Constant.PROGRAM,
+    super("NFS3", "localhost", config.getInt(
+        NfsConfigKeys.DFS_NFS_SERVER_PORT_KEY,
+        NfsConfigKeys.DFS_NFS_SERVER_PORT_DEFAULT), Nfs3Constant.PROGRAM,
         Nfs3Constant.VERSION, Nfs3Constant.VERSION, registrationSocket,
         allowInsecurePorts);
    
+    this.config = config;
     config.set(FsPermission.UMASK_LABEL, "000");
-    iug = new IdUserGroup();
+    iug = new IdUserGroup(config);
     
     exports = NfsExports.getInstance(config);
     writeManager = new WriteManager(iug, config);
@@ -180,13 +180,13 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
         CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY,
         CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT);
     
-    writeDumpDir = config.get(Nfs3Constant.FILE_DUMP_DIR_KEY,
-        Nfs3Constant.FILE_DUMP_DIR_DEFAULT);
-    boolean enableDump = config.getBoolean(Nfs3Constant.ENABLE_FILE_DUMP_KEY,
-        Nfs3Constant.ENABLE_FILE_DUMP_DEFAULT);
+    writeDumpDir = config.get(NfsConfigKeys.DFS_NFS_FILE_DUMP_DIR_KEY,
+        NfsConfigKeys.DFS_NFS_FILE_DUMP_DIR_DEFAULT);
+    boolean enableDump = config.getBoolean(NfsConfigKeys.DFS_NFS_FILE_DUMP_KEY,
+        NfsConfigKeys.DFS_NFS_FILE_DUMP_DEFAULT);
     UserGroupInformation.setConfiguration(config);
-    SecurityUtil.login(config, DFS_NFS_KEYTAB_FILE_KEY,
-            DFS_NFS_KERBEROS_PRINCIPAL_KEY);
+    SecurityUtil.login(config, NfsConfigKeys.DFS_NFS_KEYTAB_FILE_KEY,
+        NfsConfigKeys.DFS_NFS_KERBEROS_PRINCIPAL_KEY);
 
     if (!enableDump) {
       writeDumpDir = null;
@@ -567,8 +567,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
             + handle.getFileId());
         return new READLINK3Response(Nfs3Status.NFS3ERR_SERVERFAULT);
       }
-      int rtmax = config.getInt(Nfs3Constant.MAX_READ_TRANSFER_SIZE_KEY,
-              Nfs3Constant.MAX_READ_TRANSFER_SIZE_DEFAULT);
+      int rtmax = config.getInt(NfsConfigKeys.DFS_NFS_MAX_READ_TRANSFER_SIZE_KEY,
+          NfsConfigKeys.DFS_NFS_MAX_READ_TRANSFER_SIZE_DEFAULT);
       if (rtmax < target.getBytes().length) {
         LOG.error("Link size: " + target.getBytes().length
             + " is larger than max transfer size: " + rtmax);
@@ -665,8 +665,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
     }
 
     try {
-      int rtmax = config.getInt(Nfs3Constant.MAX_READ_TRANSFER_SIZE_KEY,
-              Nfs3Constant.MAX_READ_TRANSFER_SIZE_DEFAULT);
+      int rtmax = config.getInt(NfsConfigKeys.DFS_NFS_MAX_READ_TRANSFER_SIZE_KEY,
+          NfsConfigKeys.DFS_NFS_MAX_READ_TRANSFER_SIZE_DEFAULT);
       int buffSize = Math.min(rtmax, count);
       byte[] readbuffer = new byte[buffSize];
 
@@ -1740,12 +1740,15 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
     }
 
     try {
-      int rtmax = config.getInt(Nfs3Constant.MAX_READ_TRANSFER_SIZE_KEY,
-              Nfs3Constant.MAX_READ_TRANSFER_SIZE_DEFAULT);
-      int wtmax = config.getInt(Nfs3Constant.MAX_WRITE_TRANSFER_SIZE_KEY,
-              Nfs3Constant.MAX_WRITE_TRANSFER_SIZE_DEFAULT);
-      int dtperf = config.getInt(Nfs3Constant.MAX_READDIR_TRANSFER_SIZE_KEY,
-              Nfs3Constant.MAX_READDIR_TRANSFER_SIZE_DEFAULT);
+      int rtmax = config.getInt(
+          NfsConfigKeys.DFS_NFS_MAX_READ_TRANSFER_SIZE_KEY,
+          NfsConfigKeys.DFS_NFS_MAX_READ_TRANSFER_SIZE_DEFAULT);
+      int wtmax = config.getInt(
+          NfsConfigKeys.DFS_NFS_MAX_WRITE_TRANSFER_SIZE_KEY,
+          NfsConfigKeys.DFS_NFS_MAX_WRITE_TRANSFER_SIZE_DEFAULT);
+      int dtperf = config.getInt(
+          NfsConfigKeys.DFS_NFS_MAX_READDIR_TRANSFER_SIZE_KEY,
+          NfsConfigKeys.DFS_NFS_MAX_READDIR_TRANSFER_SIZE_DEFAULT);
 
       Nfs3FileAttributes attrs = Nfs3Utils.getFileAttr(dfsClient,
           Nfs3Utils.getFileIdPath(handle), iug);

+ 13 - 12
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteManager.java

@@ -21,10 +21,11 @@ import java.io.IOException;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.nfs.nfs3.OpenFileCtx.COMMIT_STATUS;
 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
 import org.apache.hadoop.ipc.RemoteException;
@@ -50,7 +51,7 @@ import com.google.common.annotations.VisibleForTesting;
 public class WriteManager {
   public static final Log LOG = LogFactory.getLog(WriteManager.class);
 
-  private final Configuration config;
+  private final NfsConfiguration config;
   private final IdUserGroup iug;
  
   private AsyncDataService asyncDataService;
@@ -78,19 +79,19 @@ public class WriteManager {
     return fileContextCache.put(h, ctx);
   }
   
-  WriteManager(IdUserGroup iug, final Configuration config) {
+  WriteManager(IdUserGroup iug, final NfsConfiguration config) {
     this.iug = iug;
     this.config = config;
-    streamTimeout = config.getLong(Nfs3Constant.OUTPUT_STREAM_TIMEOUT,
-        Nfs3Constant.OUTPUT_STREAM_TIMEOUT_DEFAULT);
+    streamTimeout = config.getLong(NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_KEY,
+        NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_DEFAULT);
     LOG.info("Stream timeout is " + streamTimeout + "ms.");
-    if (streamTimeout < Nfs3Constant.OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT) {
+    if (streamTimeout < NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT) {
       LOG.info("Reset stream timeout to minimum value "
-          + Nfs3Constant.OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT + "ms.");
-      streamTimeout = Nfs3Constant.OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT;
+          + NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT + "ms.");
+      streamTimeout = NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT;
     }
-    maxStreams = config.getInt(Nfs3Constant.MAX_OPEN_FILES,
-        Nfs3Constant.MAX_OPEN_FILES_DEFAULT);
+    maxStreams = config.getInt(NfsConfigKeys.DFS_NFS_MAX_OPEN_FILES_KEY,
+        NfsConfigKeys.DFS_NFS_MAX_OPEN_FILES_DEFAULT);
     LOG.info("Maximum open streams is "+ maxStreams);
     this.fileContextCache = new OpenFileCtxCache(config, streamTimeout);
   }
@@ -171,8 +172,8 @@ public class WriteManager {
       }
 
       // Add open stream
-      String writeDumpDir = config.get(Nfs3Constant.FILE_DUMP_DIR_KEY,
-          Nfs3Constant.FILE_DUMP_DIR_DEFAULT);
+      String writeDumpDir = config.get(NfsConfigKeys.DFS_NFS_FILE_DUMP_DIR_KEY,
+          NfsConfigKeys.DFS_NFS_FILE_DUMP_DIR_DEFAULT);
       openFileCtx = new OpenFileCtx(fos, latestAttr, writeDumpDir + "/"
           + fileHandle.getFileId(), dfsClient, iug);
 

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestMountd.java

@@ -23,8 +23,8 @@ import java.net.InetAddress;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.nfs.mount.RpcProgramMountd;
 import org.apache.hadoop.hdfs.nfs.nfs3.Nfs3;
 import org.apache.hadoop.hdfs.nfs.nfs3.RpcProgramNfs3;
@@ -38,7 +38,7 @@ public class TestMountd {
   @Test
   public void testStart() throws IOException {
     // Start minicluster
-    Configuration config = new Configuration();
+    NfsConfiguration config = new NfsConfiguration();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(config).numDataNodes(1)
         .build();
     cluster.waitActive();

+ 5 - 4
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestOutOfOrderWrite.java

@@ -23,7 +23,8 @@ import java.util.Arrays;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.nfs.nfs3.Nfs3Utils;
 import org.apache.hadoop.nfs.nfs3.FileHandle;
 import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
@@ -155,10 +156,10 @@ public class TestOutOfOrderWrite {
     Arrays.fill(data3, (byte) 9);
 
     // NFS3 Create request
-    Configuration conf = new Configuration();
+    NfsConfiguration conf = new NfsConfiguration();
     WriteClient client = new WriteClient("localhost", conf.getInt(
-        Nfs3Constant.NFS3_SERVER_PORT, Nfs3Constant.NFS3_SERVER_PORT_DEFAULT),
-        create(), false);
+        NfsConfigKeys.DFS_NFS_SERVER_PORT_KEY,
+        NfsConfigKeys.DFS_NFS_SERVER_PORT_DEFAULT), create(), false);
     client.run();
 
     while (handle == null) {

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestReaddir.java

@@ -24,11 +24,11 @@ import java.io.IOException;
 import java.net.InetAddress;
 import java.util.List;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.nfs.nfs3.Nfs3;
 import org.apache.hadoop.hdfs.nfs.nfs3.RpcProgramNfs3;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
@@ -53,7 +53,7 @@ import org.mockito.Mockito;
  */
 public class TestReaddir {
 
-  static Configuration config = new Configuration();
+  static NfsConfiguration config = new NfsConfiguration();
   static MiniDFSCluster cluster = null;
   static DistributedFileSystem hdfs;
   static NameNode nn;

+ 6 - 6
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestDFSClientCache.java

@@ -18,24 +18,24 @@
 package org.apache.hadoop.hdfs.nfs.nfs3;
 
 import static org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod.KERBEROS;
+import static org.hamcrest.core.Is.is;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertThat;
-import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.DFSClient;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.junit.Test;
 
 public class TestDFSClientCache {
   @Test
   public void testEviction() throws IOException {
-    Configuration conf = new Configuration();
+    NfsConfiguration conf = new NfsConfiguration();
     conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "hdfs://localhost");
 
     // Only one entry will be in the cache
@@ -59,7 +59,7 @@ public class TestDFSClientCache {
     String currentUser = "test-user";
 
 
-    Configuration conf = new Configuration();
+    NfsConfiguration conf = new NfsConfiguration();
     UserGroupInformation currentUserUgi
             = UserGroupInformation.createRemoteUser(currentUser);
     currentUserUgi.setAuthenticationMethod(KERBEROS);
@@ -83,7 +83,7 @@ public class TestDFSClientCache {
 
     UserGroupInformation currentUserUgi = UserGroupInformation
             .createUserForTesting(currentUser, new String[0]);
-    Configuration conf = new Configuration();
+    NfsConfiguration conf = new NfsConfiguration();
     conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "hdfs://localhost");
     DFSClientCache cache = new DFSClientCache(conf);
     UserGroupInformation ugiResult

+ 4 - 4
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestExportsTable.java

@@ -21,22 +21,22 @@ import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.nfs.mount.Mountd;
 import org.apache.hadoop.hdfs.nfs.mount.RpcProgramMountd;
-import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
 import org.junit.Test;
 
 public class TestExportsTable {
  
   @Test
   public void testExportPoint() throws IOException {
-    Configuration config = new Configuration();
+    NfsConfiguration config = new NfsConfiguration();
     MiniDFSCluster cluster = null;
 
     String exportPoint = "/myexport1";
-    config.setStrings(Nfs3Constant.EXPORT_POINT, exportPoint);
+    config.setStrings(NfsConfigKeys.DFS_NFS_EXPORT_POINT_KEY, exportPoint);
     // Use emphral port in case tests are running in parallel
     config.setInt("nfs3.mountd.port", 0);
     config.setInt("nfs3.server.port", 0);

+ 20 - 20
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestOpenFileCtxCache.java

@@ -22,13 +22,13 @@ import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.nfs.nfs3.OpenFileCtx.CommitCtx;
 import org.apache.hadoop.nfs.nfs3.FileHandle;
 import org.apache.hadoop.nfs.nfs3.IdUserGroup;
-import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
 import org.apache.hadoop.nfs.nfs3.Nfs3FileAttributes;
 import org.junit.Test;
 import org.mockito.Mockito;
@@ -38,10 +38,10 @@ public class TestOpenFileCtxCache {
 
   @Test
   public void testEviction() throws IOException, InterruptedException {
-    Configuration conf = new Configuration();
+    NfsConfiguration conf = new NfsConfiguration();
 
     // Only two entries will be in the cache
-    conf.setInt(Nfs3Constant.MAX_OPEN_FILES, 2);
+    conf.setInt(NfsConfigKeys.DFS_NFS_MAX_OPEN_FILES_KEY, 2);
 
     DFSClient dfsClient = Mockito.mock(DFSClient.class);
     Nfs3FileAttributes attr = new Nfs3FileAttributes();
@@ -49,15 +49,15 @@ public class TestOpenFileCtxCache {
     Mockito.when(fos.getPos()).thenReturn((long) 0);
 
     OpenFileCtx context1 = new OpenFileCtx(fos, attr, "/dumpFilePath",
-        dfsClient, new IdUserGroup());
+        dfsClient, new IdUserGroup(new NfsConfiguration()));
     OpenFileCtx context2 = new OpenFileCtx(fos, attr, "/dumpFilePath",
-        dfsClient, new IdUserGroup());
+        dfsClient, new IdUserGroup(new NfsConfiguration()));
     OpenFileCtx context3 = new OpenFileCtx(fos, attr, "/dumpFilePath",
-        dfsClient, new IdUserGroup());
+        dfsClient, new IdUserGroup(new NfsConfiguration()));
     OpenFileCtx context4 = new OpenFileCtx(fos, attr, "/dumpFilePath",
-        dfsClient, new IdUserGroup());
+        dfsClient, new IdUserGroup(new NfsConfiguration()));
     OpenFileCtx context5 = new OpenFileCtx(fos, attr, "/dumpFilePath",
-        dfsClient, new IdUserGroup());
+        dfsClient, new IdUserGroup(new NfsConfiguration()));
 
     OpenFileCtxCache cache = new OpenFileCtxCache(conf, 10 * 60 * 100);
 
@@ -71,7 +71,7 @@ public class TestOpenFileCtxCache {
     assertTrue(cache.size() == 2);
 
     // Wait for the oldest stream to be evict-able, insert again
-    Thread.sleep(Nfs3Constant.OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT);
+    Thread.sleep(NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT);
     assertTrue(cache.size() == 2);
 
     ret = cache.put(new FileHandle(3), context3);
@@ -90,17 +90,17 @@ public class TestOpenFileCtxCache {
         new WriteCtx(null, 0, 0, 0, null, null, null, 0, false, null));
     context4.getPendingCommitsForTest().put(new Long(100),
         new CommitCtx(0, null, 0, attr));
-    Thread.sleep(Nfs3Constant.OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT);
+    Thread.sleep(NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT);
     ret = cache.put(new FileHandle(5), context5);
     assertFalse(ret);
   }
 
   @Test
   public void testScan() throws IOException, InterruptedException {
-    Configuration conf = new Configuration();
+    NfsConfiguration conf = new NfsConfiguration();
 
     // Only two entries will be in the cache
-    conf.setInt(Nfs3Constant.MAX_OPEN_FILES, 2);
+    conf.setInt(NfsConfigKeys.DFS_NFS_MAX_OPEN_FILES_KEY, 2);
 
     DFSClient dfsClient = Mockito.mock(DFSClient.class);
     Nfs3FileAttributes attr = new Nfs3FileAttributes();
@@ -108,13 +108,13 @@ public class TestOpenFileCtxCache {
     Mockito.when(fos.getPos()).thenReturn((long) 0);
 
     OpenFileCtx context1 = new OpenFileCtx(fos, attr, "/dumpFilePath",
-        dfsClient, new IdUserGroup());
+        dfsClient, new IdUserGroup(new NfsConfiguration()));
     OpenFileCtx context2 = new OpenFileCtx(fos, attr, "/dumpFilePath",
-        dfsClient, new IdUserGroup());
+        dfsClient, new IdUserGroup(new NfsConfiguration()));
     OpenFileCtx context3 = new OpenFileCtx(fos, attr, "/dumpFilePath",
-        dfsClient, new IdUserGroup());
+        dfsClient, new IdUserGroup(new NfsConfiguration()));
     OpenFileCtx context4 = new OpenFileCtx(fos, attr, "/dumpFilePath",
-        dfsClient, new IdUserGroup());
+        dfsClient, new IdUserGroup(new NfsConfiguration()));
 
     OpenFileCtxCache cache = new OpenFileCtxCache(conf, 10 * 60 * 100);
 
@@ -123,8 +123,8 @@ public class TestOpenFileCtxCache {
     assertTrue(ret);
     ret = cache.put(new FileHandle(2), context2);
     assertTrue(ret);
-    Thread.sleep(Nfs3Constant.OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT + 1);
-    cache.scan(Nfs3Constant.OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT);
+    Thread.sleep(NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT + 1);
+    cache.scan(NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT);
     assertTrue(cache.size() == 0);
 
     // Test cleaning inactive entry
@@ -133,7 +133,7 @@ public class TestOpenFileCtxCache {
     ret = cache.put(new FileHandle(4), context4);
     assertTrue(ret);
     context3.setActiveStatusForTest(false);
-    cache.scan(Nfs3Constant.OUTPUT_STREAM_TIMEOUT_DEFAULT);
+    cache.scan(NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_DEFAULT);
     assertTrue(cache.size() == 1);
     assertTrue(cache.get(new FileHandle(3)) == null);
     assertTrue(cache.get(new FileHandle(4)) != null);

+ 42 - 0
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestRpcProgramNfs3.java

@@ -17,6 +17,11 @@
  */
 package org.apache.hadoop.hdfs.nfs.nfs3;
 
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
 import org.junit.Assert;
 import org.junit.Test;
@@ -63,4 +68,41 @@ public class TestRpcProgramNfs3 {
       }
     }
   }
+
+  @Test
+  public void testDeprecatedKeys() {
+    NfsConfiguration conf = new NfsConfiguration();
+    conf.setInt("nfs3.server.port", 998);
+    assertTrue(conf.getInt(NfsConfigKeys.DFS_NFS_SERVER_PORT_KEY, 0) == 998);
+
+    conf.setInt("nfs3.mountd.port", 999);
+    assertTrue(conf.getInt(NfsConfigKeys.DFS_NFS_MOUNTD_PORT_KEY, 0) == 999);
+
+    conf.set("dfs.nfs.exports.allowed.hosts", "host1");
+    assertTrue(conf.get(CommonConfigurationKeys.NFS_EXPORTS_ALLOWED_HOSTS_KEY)
+        .equals("host1"));
+
+    conf.setInt("dfs.nfs.exports.cache.expirytime.millis", 1000);
+    assertTrue(conf.getInt(
+        Nfs3Constant.NFS_EXPORTS_CACHE_EXPIRYTIME_MILLIS_KEY, 0) == 1000);
+
+    conf.setInt("hadoop.nfs.userupdate.milly", 10);
+    assertTrue(conf.getInt(Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_KEY, 0) == 10);
+
+    conf.set("dfs.nfs3.dump.dir", "/nfs/tmp");
+    assertTrue(conf.get(NfsConfigKeys.DFS_NFS_FILE_DUMP_DIR_KEY).equals(
+        "/nfs/tmp"));
+
+    conf.setBoolean("dfs.nfs3.enableDump", false);
+    assertTrue(conf.getBoolean(NfsConfigKeys.DFS_NFS_FILE_DUMP_KEY, true) == false);
+
+    conf.setInt("dfs.nfs3.max.open.files", 500);
+    assertTrue(conf.getInt(NfsConfigKeys.DFS_NFS_MAX_OPEN_FILES_KEY, 0) == 500);
+
+    conf.setInt("dfs.nfs3.stream.timeout", 6000);
+    assertTrue(conf.getInt(NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_KEY, 0) == 6000);
+
+    conf.set("dfs.nfs3.export.point", "/dir1");
+    assertTrue(conf.get(NfsConfigKeys.DFS_NFS_EXPORT_POINT_KEY).equals("/dir1"));
+  }
 }

+ 6 - 6
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java

@@ -27,11 +27,10 @@ import java.nio.ByteBuffer;
 import java.util.Arrays;
 import java.util.concurrent.ConcurrentNavigableMap;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSClient;
-import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.nfs.nfs3.OpenFileCtx.COMMIT_STATUS;
 import org.apache.hadoop.hdfs.nfs.nfs3.OpenFileCtx.CommitCtx;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
@@ -139,7 +138,7 @@ public class TestWrites {
     Mockito.when(fos.getPos()).thenReturn((long) 0);
 
     OpenFileCtx ctx = new OpenFileCtx(fos, attr, "/dumpFilePath", dfsClient,
-        new IdUserGroup());
+        new IdUserGroup(new NfsConfiguration()));
 
     COMMIT_STATUS ret;
 
@@ -201,13 +200,14 @@ public class TestWrites {
     Nfs3FileAttributes attr = new Nfs3FileAttributes();
     HdfsDataOutputStream fos = Mockito.mock(HdfsDataOutputStream.class);
     Mockito.when(fos.getPos()).thenReturn((long) 0);
+    NfsConfiguration config = new NfsConfiguration();
 
     OpenFileCtx ctx = new OpenFileCtx(fos, attr, "/dumpFilePath", dfsClient,
-        new IdUserGroup());
+        new IdUserGroup(config));
 
     FileHandle h = new FileHandle(1); // fake handle for "/dumpFilePath"
     COMMIT_STATUS ret;
-    WriteManager wm = new WriteManager(new IdUserGroup(), new Configuration());
+    WriteManager wm = new WriteManager(new IdUserGroup(config), config);
     assertTrue(wm.addOpenFileStream(h, ctx));
     
     // Test inactive open file context
@@ -280,7 +280,7 @@ public class TestWrites {
 
   @Test
   public void testWriteStableHow() throws IOException, InterruptedException {
-    HdfsConfiguration config = new HdfsConfiguration();
+    NfsConfiguration config = new NfsConfiguration();
     DFSClient client = null;
     MiniDFSCluster cluster = null;
     RpcProgramNfs3 nfsd;

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/resources/core-site.xml

@@ -18,12 +18,12 @@
 
 <configuration>
 <property>
-  <name>nfs3.server.port</name>
+  <name>nfs.server.port</name>
   <value>2079</value>
 </property>
 
 <property>
-  <name>nfs3.mountd.port</name>
+  <name>nfs.mountd.port</name>
   <value>4272</value>
 </property>
 </configuration>

+ 11 - 1
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -452,7 +452,14 @@ Release 2.5.0 - UNRELEASED
     HDFS-6356. Fix typo in DatanodeLayoutVersion. (Tulasi G via wang)
 
     HDFS-6447. balancer should timestamp the completion message.
-    (Juan Yu via wang).
+    (Juan Yu via wang)
+
+    HDFS-6463. Clarify behavior of AclStorage#createFsPermissionForExtendedAcl
+    in comments. (cnauroth)
+
+    HDFS-6472. fix typo in webapps/hdfs/explorer.js. (Juan Yu via wang)
+
+    HDFS-6056. Clean up NFS config settings (brandonli)
 
   OPTIMIZATIONS
 
@@ -617,6 +624,9 @@ Release 2.5.0 - UNRELEASED
 
     HDFS-6462. NFS: fsstat request fails with the secure hdfs (brandonli)
 
+    HDFS-6404. HttpFS should use a 000 umask for mkdir and create 
+    operations. (yoderme via tucu)
+
 Release 2.4.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 0 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

@@ -639,12 +639,6 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String DFS_DFSCLIENT_HEDGED_READ_THREADPOOL_SIZE =
       "dfs.client.hedged.read.threadpool.size";
   public static final int     DEFAULT_DFSCLIENT_HEDGED_READ_THREADPOOL_SIZE = 0;
-  public static final String  DFS_NFS_KEYTAB_FILE_KEY = "dfs.nfs.keytab.file";
-  public static final String  DFS_NFS_KERBEROS_PRINCIPAL_KEY = "dfs.nfs.kerberos.principal";
-  public static final String  DFS_NFS_REGISTRATION_PORT_KEY = "dfs.nfs.registration.port";
-  public static final int     DFS_NFS_REGISTRATION_PORT_DEFAULT = 40; // Currently unassigned.
-  public static final String  DFS_NFS_ALLOW_INSECURE_PORTS_KEY = "dfs.nfs.allow.insecure.ports";
-  public static final boolean DFS_NFS_ALLOW_INSECURE_PORTS_DEFAULT = true;
 
    // Slow io warning log threshold settings for dfsclient and datanode.
    public static final String DFS_CLIENT_SLOW_IO_WARNING_THRESHOLD_KEY =

+ 4 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclStorage.java

@@ -338,6 +338,10 @@ final class AclStorage {
    * ACL, based on its access ACL entries.  For a correctly sorted ACL, the
    * first entry is the owner and the last 2 entries are the mask and other
    * entries respectively.  Also preserve sticky bit and toggle ACL bit on.
+   * Note that this method intentionally copies the permissions of the mask
+   * entry into the FsPermission group permissions.  This is consistent with the
+   * POSIX ACLs model, which presents the mask as the permissions of the group
+   * class.
    *
    * @param accessEntries List<AclEntry> access ACL entries
    * @param existingPerm FsPermission existing permissions

+ 51 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

@@ -1296,8 +1296,56 @@
     non-zero integer.
   </description>
 </property>
+
+<property>
+  <name>nfs.server.port</name>
+  <value>2049</value>
+  <description>
+      Specify the port number used by Hadoop NFS.
+  </description>
+</property>
+
+<property>
+  <name>nfs.mountd.port</name>
+  <value>4242</value>
+  <description>
+      Specify the port number used by Hadoop mount daemon.
+  </description>
+</property>
+
+<property>    
+  <name>nfs.dump.dir</name>
+  <value>/tmp/.hdfs-nfs</value>
+  <description>
+    This directory is used to temporarily save out-of-order writes before
+    writing to HDFS. For each file, the out-of-order writes are dumped after
+    they are accumulated to exceed certain threshold (e.g., 1MB) in memory. 
+    One needs to make sure the directory has enough space.
+  </description>
+</property>
+
+<property>
+  <name>nfs.rtmax</name>
+  <value>1048576</value>
+  <description>This is the maximum size in bytes of a READ request
+    supported by the NFS gateway. If you change this, make sure you
+    also update the nfs mount's rsize(add rsize= # of bytes to the 
+    mount directive).
+  </description>
+</property>
+
+<property>
+  <name>nfs.wtmax</name>
+  <value>1048576</value>
+  <description>This is the maximum size in bytes of a WRITE request
+    supported by the NFS gateway. If you change this, make sure you
+    also update the nfs mount's wsize(add wsize= # of bytes to the 
+    mount directive).
+  </description>
+</property>
+
 <property>
-  <name>dfs.nfs.keytab.file</name>
+  <name>nfs.keytab.file</name>
   <value></value>
   <description>
     *Note*: Advanced property. Change with caution.
@@ -1307,7 +1355,7 @@
 </property>
 
 <property>
-  <name>dfs.nfs.kerberos.principal</name>
+  <name>nfs.kerberos.principal</name>
   <value></value>
   <description>
     *Note*: Advanced property. Change with caution.
@@ -1318,7 +1366,7 @@
 </property>
 
 <property>
-  <name>dfs.nfs.allow.insecure.ports</name>
+  <name>nfs.allow.insecure.ports</name>
   <value>true</value>
   <description>
     When set to false, client connections originating from unprivileged ports

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js

@@ -45,7 +45,7 @@
     return function (jqxhr, text, err) {
       switch(jqxhr.status) {
         case 401:
-          var msg = '<p>Authentication failed when trying to open ' + url + ': Unauthrozied.</p>';
+          var msg = '<p>Authentication failed when trying to open ' + url + ': Unauthorized.</p>';
           break;
         case 403:
           if(jqxhr.responseJSON !== undefined && jqxhr.responseJSON.RemoteException !== undefined) {

+ 7 - 7
hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm

@@ -76,14 +76,14 @@ HDFS NFS Gateway
 
 ----
   <property>
-    <name>dfs.nfs.keytab.file</name>
+    <name>nfs.keytab.file</name>
     <value>/etc/hadoop/conf/nfsserver.keytab</value> <!-- path to the nfs gateway keytab -->
   </property>
 ----
 
 ----
   <property>
-    <name>dfs.nfs.kerberos.principal</name>
+    <name>nfs.kerberos.principal</name>
     <value>nfsserver/_HOST@YOUR-REALM.COM</value>
   </property>
 ----
@@ -121,7 +121,7 @@ HDFS NFS Gateway
 
 ----
   <property>    
-    <name>dfs.nfs3.dump.dir</name>
+    <name>nfs.dump.dir</name>
     <value>/tmp/.hdfs-nfs</value>
   </property>
 ---- 
@@ -134,7 +134,7 @@ HDFS NFS Gateway
 
 ----
 <property>
-  <name>dfs.nfs.rtmax</name>
+  <name>nfs.rtmax</name>
   <value>1048576</value>
   <description>This is the maximum size in bytes of a READ request
     supported by the NFS gateway. If you change this, make sure you
@@ -146,7 +146,7 @@ HDFS NFS Gateway
 
 ----
 <property>
-  <name>dfs.nfs.wtmax</name>
+  <name>nfs.wtmax</name>
   <value>65536</value>
   <description>This is the maximum size in bytes of a WRITE request
     supported by the NFS gateway. If you change this, make sure you
@@ -167,7 +167,7 @@ HDFS NFS Gateway
 
 ----
 <property>
-  <name>dfs.nfs.exports.allowed.hosts</name>
+  <name>nfs.exports.allowed.hosts</name>
   <value>* rw</value>
 </property>
 ----
@@ -345,7 +345,7 @@ HDFS NFS Gateway
   file in the event one wishes to access the HDFS NFS Gateway from a system with
   a completely disparate set of UIDs/GIDs. By default this file is located at
   "/etc/nfs.map", but a custom location can be configured by setting the
-  "dfs.nfs.static.mapping.file" property to the path of the static mapping file.
+  "nfs.static.mapping.file" property to the path of the static mapping file.
   The format of the static mapping file is similar to what is described in the
   exports(5) manual page, but roughly it is:
 

+ 12 - 0
hadoop-yarn-project/CHANGES.txt

@@ -29,6 +29,9 @@ Release 2.5.0 - UNRELEASED
     YARN-1362. Distinguish between nodemanager shutdown for decommission vs shutdown 
     for restart. (Jason Lowe via junping_du)
 
+    YARN-1338. Recover localized resource cache state upon nodemanager restart 
+    (Jason Lowe via junping_du)
+
   IMPROVEMENTS
 
     YARN-1479. Invalid NaN values in Hadoop REST API JSON response (Chen He via
@@ -117,6 +120,12 @@ Release 2.5.0 - UNRELEASED
     YARN-596. Use scheduling policies throughout the queue hierarchy to decide
     which containers to preempt (Wei Yan via Sandy Ryza)
 
+    YARN-2054. Better defaults for YARN ZK configs for retries and retry-inteval 
+    when HA is enabled. (kasha)
+
+    YARN-2010. Document yarn.resourcemanager.zk-auth and its scope. 
+    (Robert Kanter via kasha)
+
   OPTIMIZATIONS
 
   BUG FIXES 
@@ -162,6 +171,9 @@ Release 2.5.0 - UNRELEASED
     that both Timeline Server and client can access them. (Zhijie Shen via
     vinodkv)
 
+    YARN-1868. YARN status web ui does not show correctly in IE 11.
+    (Chuan Liu via cnauroth)
+
 Release 2.4.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

@@ -324,11 +324,11 @@ public class YarnConfiguration extends Configuration {
   public static final String RM_ZK_ADDRESS = RM_ZK_PREFIX + "address";
 
   public static final String RM_ZK_NUM_RETRIES = RM_ZK_PREFIX + "num-retries";
-  public static final int DEFAULT_ZK_RM_NUM_RETRIES = 500;
+  public static final int DEFAULT_ZK_RM_NUM_RETRIES = 1000;
 
   public static final String RM_ZK_RETRY_INTERVAL_MS =
       RM_ZK_PREFIX + "retry-interval-ms";
-  public static final long DEFAULT_RM_ZK_RETRY_INTERVAL_MS = 2000;
+  public static final long DEFAULT_RM_ZK_RETRY_INTERVAL_MS = 1000;
 
   public static final String RM_ZK_TIMEOUT_MS = RM_ZK_PREFIX + "timeout-ms";
   public static final int DEFAULT_RM_ZK_TIMEOUT_MS = 10000;

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/HtmlPage.java

@@ -79,7 +79,8 @@ public abstract class HtmlPage extends TextView {
   @Override
   public void render() {
     puts(DOCTYPE);
-    render(page().html().meta_http("Content-type", MimeType.HTML));
+    render(page().html().meta_http("X-UA-Compatible", "IE=8")
+        .meta_http("Content-type", MimeType.HTML));
     if (page().nestLevel() != 0) {
       throw new WebAppException("Error rendering page: nestLevel="+
                                 page().nestLevel());

+ 17 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml

@@ -309,14 +309,17 @@
   <property>
     <description>Number of times RM tries to connect to ZooKeeper.</description>
     <name>yarn.resourcemanager.zk-num-retries</name>
-    <value>500</value>
+    <value>1000</value>
   </property>
 
   <property>
     <description>Retry interval in milliseconds when connecting to ZooKeeper.
+      When HA is enabled, the value here is NOT used. It is generated
+      automatically from yarn.resourcemanager.zk-timeout-ms and
+      yarn.resourcemanager.zk-num-retries.
     </description>
     <name>yarn.resourcemanager.zk-retry-interval-ms</name>
-    <value>2000</value>
+    <value>1000</value>
   </property>
 
   <property>
@@ -369,6 +372,18 @@
     <name>yarn.resourcemanager.zk-state-store.root-node.acl</name>
   </property>
 
+  <property>
+    <description>
+        Specify the auths to be used for the ACL's specified in both the
+        yarn.resourcemanager.zk-acl and
+        yarn.resourcemanager.zk-state-store.root-node.acl properties.  This
+        takes a comma-separated list of authentication mechanisms, each of the
+        form 'scheme:auth' (the same syntax used for the 'addAuth' command in
+        the ZK CLI).
+    </description>
+    <name>yarn.resourcemanager.zk-auth</name>
+  </property>
+
   <property>
     <description>URI pointing to the location of the FileSystem path where
     RM state will be stored. This must be supplied when using

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/TestSubViews.java

@@ -70,6 +70,6 @@ public class TestSubViews {
     out.flush();
     verify(out).print("sub1 text");
     verify(out).print("sub2 text");
-    verify(out, times(15)).println(); // test inline transition across views
+    verify(out, times(16)).println(); // test inline transition across views
   }
 }

+ 8 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/view/TestHtmlPage.java

@@ -22,11 +22,12 @@ import com.google.inject.Injector;
 
 import java.io.PrintWriter;
 
+import org.apache.hadoop.yarn.webapp.MimeType;
 import org.apache.hadoop.yarn.webapp.WebAppException;
 import org.apache.hadoop.yarn.webapp.test.WebAppTests;
 import org.apache.hadoop.yarn.webapp.view.HtmlPage;
-
 import org.junit.Test;
+
 import static org.mockito.Mockito.*;
 
 public class TestHtmlPage {
@@ -53,6 +54,12 @@ public class TestHtmlPage {
     Injector injector = WebAppTests.testPage(TestView.class);
     PrintWriter out = injector.getInstance(PrintWriter.class);
 
+    // Verify the HTML page has correct meta tags in the header
+    verify(out).print(" http-equiv=\"X-UA-Compatible\"");
+    verify(out).print(" content=\"IE=8\"");
+    verify(out).print(" http-equiv=\"Content-type\"");
+    verify(out).print(String.format(" content=\"%s\"", MimeType.HTML));
+
     verify(out).print("test");
     verify(out).print(" id=\"testid\"");
     verify(out).print("test note");

+ 5 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml

@@ -156,6 +156,10 @@
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-server-common</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.fusesource.leveldbjni</groupId>
+      <artifactId>leveldbjni-all</artifactId>
+    </dependency>
   </dependencies>
 
   <profiles>
@@ -292,6 +296,7 @@
               <source>
                 <directory>${basedir}/src/main/proto</directory>
                 <includes>
+		  <include>yarn_server_nodemanager_recovery.proto</include>
                   <include>yarn_server_nodemanager_service_protos.proto</include>
                   <include>LocalizationProtocol.proto</include>
                 </includes>

+ 3 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java

@@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
@@ -67,6 +68,8 @@ public interface Context {
 
   ApplicationACLsManager getApplicationACLsManager();
 
+  NMStateStoreService getNMStateStore();
+
   boolean getDecommissioned();
 
   void setDecommissioned(boolean isDecommissioned);

+ 49 - 9
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java

@@ -53,6 +53,9 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManag
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMLeveldbStateStoreService;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
 import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer;
@@ -78,6 +81,7 @@ public class NodeManager extends CompositeService
   private ContainerManagerImpl containerManager;
   private NodeStatusUpdater nodeStatusUpdater;
   private static CompositeServiceShutdownHook nodeManagerShutdownHook; 
+  private NMStateStoreService nmStore = null;
   
   private AtomicBoolean isStopping = new AtomicBoolean(false);
   
@@ -115,9 +119,10 @@ public class NodeManager extends CompositeService
 
   protected NMContext createNMContext(
       NMContainerTokenSecretManager containerTokenSecretManager,
-      NMTokenSecretManagerInNM nmTokenSecretManager) {
+      NMTokenSecretManagerInNM nmTokenSecretManager,
+      NMStateStoreService stateStore) {
     return new NMContext(containerTokenSecretManager, nmTokenSecretManager,
-        dirsHandler, aclsManager);
+        dirsHandler, aclsManager, stateStore);
   }
 
   protected void doSecureLogin() throws IOException {
@@ -125,11 +130,8 @@ public class NodeManager extends CompositeService
         YarnConfiguration.NM_PRINCIPAL);
   }
 
-  @Override
-  protected void serviceInit(Configuration conf) throws Exception {
-
-    conf.setBoolean(Dispatcher.DISPATCHER_EXIT_ON_ERROR_KEY, true);
-
+  private void initAndStartRecoveryStore(Configuration conf)
+      throws IOException {
     boolean recoveryEnabled = conf.getBoolean(
         YarnConfiguration.NM_RECOVERY_ENABLED,
         YarnConfiguration.DEFAULT_NM_RECOVERY_ENABLED);
@@ -142,7 +144,36 @@ public class NodeManager extends CompositeService
       }
       Path recoveryRoot = new Path(recoveryDirName);
       recoveryFs.mkdirs(recoveryRoot, new FsPermission((short)0700));
+      nmStore = new NMLeveldbStateStoreService();
+    } else {
+      nmStore = new NMNullStateStoreService();
+    }
+    nmStore.init(conf);
+    nmStore.start();
+  }
+
+  private void stopRecoveryStore() throws IOException {
+    nmStore.stop();
+    if (context.getDecommissioned() && nmStore.canRecover()) {
+      LOG.info("Removing state store due to decommission");
+      Configuration conf = getConfig();
+      Path recoveryRoot = new Path(
+          conf.get(YarnConfiguration.NM_RECOVERY_DIR));
+      LOG.info("Removing state store at " + recoveryRoot
+          + " due to decommission");
+      FileSystem recoveryFs = FileSystem.getLocal(conf);
+      if (!recoveryFs.delete(recoveryRoot, true)) {
+        LOG.warn("Unable to delete " + recoveryRoot);
+      }
     }
+  }
+
+  @Override
+  protected void serviceInit(Configuration conf) throws Exception {
+
+    conf.setBoolean(Dispatcher.DISPATCHER_EXIT_ON_ERROR_KEY, true);
+
+    initAndStartRecoveryStore(conf);
 
     NMContainerTokenSecretManager containerTokenSecretManager =
         new NMContainerTokenSecretManager(conf);
@@ -171,7 +202,7 @@ public class NodeManager extends CompositeService
     dirsHandler = nodeHealthChecker.getDiskHandler();
 
     this.context = createNMContext(containerTokenSecretManager,
-        nmTokenSecretManager);
+        nmTokenSecretManager, nmStore);
     
     nodeStatusUpdater =
         createNodeStatusUpdater(context, dispatcher, nodeHealthChecker);
@@ -220,6 +251,7 @@ public class NodeManager extends CompositeService
       return;
     }
     super.serviceStop();
+    stopRecoveryStore();
     DefaultMetricsSystem.shutdown();
   }
 
@@ -272,11 +304,13 @@ public class NodeManager extends CompositeService
     private WebServer webServer;
     private final NodeHealthStatus nodeHealthStatus = RecordFactoryProvider
         .getRecordFactory(null).newRecordInstance(NodeHealthStatus.class);
+    private final NMStateStoreService stateStore;
     private boolean isDecommissioned = false;
 
     public NMContext(NMContainerTokenSecretManager containerTokenSecretManager,
         NMTokenSecretManagerInNM nmTokenSecretManager,
-        LocalDirsHandlerService dirsHandler, ApplicationACLsManager aclsManager) {
+        LocalDirsHandlerService dirsHandler, ApplicationACLsManager aclsManager,
+        NMStateStoreService stateStore) {
       this.containerTokenSecretManager = containerTokenSecretManager;
       this.nmTokenSecretManager = nmTokenSecretManager;
       this.dirsHandler = dirsHandler;
@@ -284,6 +318,7 @@ public class NodeManager extends CompositeService
       this.nodeHealthStatus.setIsNodeHealthy(true);
       this.nodeHealthStatus.setHealthReport("Healthy");
       this.nodeHealthStatus.setLastHealthReportTime(System.currentTimeMillis());
+      this.stateStore = stateStore;
     }
 
     /**
@@ -351,6 +386,11 @@ public class NodeManager extends CompositeService
       return aclsManager;
     }
 
+    @Override
+    public NMStateStoreService getNMStateStore() {
+      return stateStore;
+    }
+
     @Override
     public boolean getDecommissioned() {
       return isDecommissioned;

+ 12 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java

@@ -22,6 +22,7 @@ import static org.apache.hadoop.service.Service.STATE.STARTED;
 
 import java.io.IOException;
 import java.net.InetSocketAddress;
+import java.net.URISyntaxException;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -116,6 +117,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.Contai
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
 import org.apache.hadoop.yarn.server.nodemanager.security.authorize.NMPolicyProvider;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
@@ -218,6 +220,15 @@ public class ContainerManagerImpl extends CompositeService implements
         SHUTDOWN_CLEANUP_SLOP_MS;
 
     super.serviceInit(conf);
+    recover();
+  }
+
+  private void recover() throws IOException, URISyntaxException {
+    NMStateStoreService stateStore = context.getNMStateStore();
+    if (stateStore.canRecover()) {
+      rsrcLocalizationSrvc.recoverLocalizedResources(
+          stateStore.loadLocalizationState());
+    }
   }
 
   protected LogHandler createLogHandler(Configuration conf, Context context,
@@ -239,7 +250,7 @@ public class ContainerManagerImpl extends CompositeService implements
   protected ResourceLocalizationService createResourceLocalizationService(
       ContainerExecutor exec, DeletionService deletionContext) {
     return new ResourceLocalizationService(this.dispatcher, exec,
-        deletionContext, dirsHandler);
+        deletionContext, dirsHandler, context.getNMStateStore());
   }
 
   protected ContainersLauncher createContainersLauncher(Context context,

+ 77 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalCacheDirectoryManager.java

@@ -26,6 +26,8 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * {@link LocalCacheDirectoryManager} is used for managing hierarchical
  * directories for local cache. It will allow to restrict the number of files in
@@ -99,6 +101,57 @@ public class LocalCacheDirectoryManager {
     }
   }
 
+  /**
+   * Increment the file count for a relative directory within the cache
+   * 
+   * @param relPath the relative path
+   */
+  public synchronized void incrementFileCountForPath(String relPath) {
+    relPath = relPath == null ? "" : relPath.trim();
+    Directory subDir = knownDirectories.get(relPath);
+    if (subDir == null) {
+      int dirnum = Directory.getDirectoryNumber(relPath);
+      totalSubDirectories = Math.max(dirnum, totalSubDirectories);
+      subDir = new Directory(dirnum);
+      nonFullDirectories.add(subDir);
+      knownDirectories.put(subDir.getRelativePath(), subDir);
+    }
+    if (subDir.incrementAndGetCount() >= perDirectoryFileLimit) {
+      nonFullDirectories.remove(subDir);
+    }
+  }
+
+  /**
+   * Given a path to a directory within a local cache tree return the
+   * root of the cache directory.
+   * 
+   * @param path the directory within a cache directory
+   * @return the local cache directory root or null if not found
+   */
+  public static Path getCacheDirectoryRoot(Path path) {
+    while (path != null) {
+      String name = path.getName();
+      if (name.length() != 1) {
+        return path;
+      }
+      int dirnum = DIRECTORIES_PER_LEVEL;
+      try {
+        dirnum = Integer.parseInt(name, DIRECTORIES_PER_LEVEL);
+      } catch (NumberFormatException e) {
+      }
+      if (dirnum >= DIRECTORIES_PER_LEVEL) {
+        return path;
+      }
+      path = path.getParent();
+    }
+    return path;
+  }
+
+  @VisibleForTesting
+  synchronized Directory getDirectory(String relPath) {
+    return knownDirectories.get(relPath);
+  }
+
   /*
    * It limits the number of files and sub directories in the directory to the
    * limit LocalCacheDirectoryManager#perDirectoryFileLimit.
@@ -108,11 +161,9 @@ public class LocalCacheDirectoryManager {
     private final String relativePath;
     private int fileCount;
 
-    public Directory(int directoryNo) {
-      fileCount = 0;
-      if (directoryNo == 0) {
-        relativePath = "";
-      } else {
+    static String getRelativePath(int directoryNo) {
+      String relativePath = "";
+      if (directoryNo > 0) {
         String tPath = Integer.toString(directoryNo - 1, DIRECTORIES_PER_LEVEL);
         StringBuffer sb = new StringBuffer();
         if (tPath.length() == 1) {
@@ -128,6 +179,27 @@ public class LocalCacheDirectoryManager {
         }
         relativePath = sb.toString();
       }
+      return relativePath;
+    }
+
+    static int getDirectoryNumber(String relativePath) {
+      String numStr = relativePath.replace("/", "");
+      if (relativePath.isEmpty()) {
+        return 0;
+      }
+      if (numStr.length() > 1) {
+        // undo step from getRelativePath() to reuse 0th sub directory
+        String firstChar = Integer.toString(
+            Integer.parseInt(numStr.substring(0, 1),
+                DIRECTORIES_PER_LEVEL) + 1, DIRECTORIES_PER_LEVEL);
+        numStr = firstChar + numStr.substring(1);
+      }
+      return Integer.parseInt(numStr, DIRECTORIES_PER_LEVEL) + 1;
+    }
+
+    public Directory(int directoryNo) {
+      fileCount = 0;
+      relativePath = getRelativePath(directoryNo);
     }
 
     public int incrementAndGetCount() {

+ 0 - 10
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTracker.java

@@ -18,15 +18,12 @@
 
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer;
 
-import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEvent;
 
-import com.google.common.annotations.VisibleForTesting;
-
 /**
  * Component tracking resources all of the same {@link LocalResourceVisibility}
  * 
@@ -34,18 +31,11 @@ import com.google.common.annotations.VisibleForTesting;
 interface LocalResourcesTracker
     extends EventHandler<ResourceEvent>, Iterable<LocalizedResource> {
 
-  // TODO: Not used at all!!
-  boolean contains(LocalResourceRequest resource);
-
   boolean remove(LocalizedResource req, DeletionService delService);
 
   Path getPathForLocalization(LocalResourceRequest req, Path localDirPath);
 
   String getUser();
 
-  long nextUniqueNumber();
-  
-  @VisibleForTesting
-  @Private
   LocalizedResource getLocalizedResource(LocalResourceRequest request);
 }

+ 156 - 29
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java

@@ -18,6 +18,7 @@
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer;
 
 import java.io.File;
+import java.io.IOException;
 import java.util.Iterator;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
@@ -27,14 +28,21 @@ import java.util.regex.Pattern;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.impl.pb.LocalResourcePBImpl;
 import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEventType;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRecoveredEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
 
 import com.google.common.annotations.VisibleForTesting;
 
@@ -53,6 +61,7 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
       .compile(RANDOM_DIR_REGEX);
 
   private final String user;
+  private final ApplicationId appId;
   private final Dispatcher dispatcher;
   private final ConcurrentMap<LocalResourceRequest,LocalizedResource> localrsrc;
   private Configuration conf;
@@ -77,17 +86,22 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
    * per APPLICATION, USER and PUBLIC cache.
    */
   private AtomicLong uniqueNumberGenerator = new AtomicLong(9);
+  private NMStateStoreService stateStore;
 
-  public LocalResourcesTrackerImpl(String user, Dispatcher dispatcher,
-      boolean useLocalCacheDirectoryManager, Configuration conf) {
-    this(user, dispatcher,
+  public LocalResourcesTrackerImpl(String user, ApplicationId appId,
+      Dispatcher dispatcher, boolean useLocalCacheDirectoryManager,
+      Configuration conf, NMStateStoreService stateStore) {
+    this(user, appId, dispatcher,
       new ConcurrentHashMap<LocalResourceRequest, LocalizedResource>(),
-      useLocalCacheDirectoryManager, conf);
+      useLocalCacheDirectoryManager, conf, stateStore);
   }
 
-  LocalResourcesTrackerImpl(String user, Dispatcher dispatcher,
+  LocalResourcesTrackerImpl(String user, ApplicationId appId,
+      Dispatcher dispatcher,
       ConcurrentMap<LocalResourceRequest,LocalizedResource> localrsrc,
-      boolean useLocalCacheDirectoryManager, Configuration conf) {
+      boolean useLocalCacheDirectoryManager, Configuration conf,
+      NMStateStoreService stateStore) {
+    this.appId = appId;
     this.user = user;
     this.dispatcher = dispatcher;
     this.localrsrc = localrsrc;
@@ -98,6 +112,7 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
         new ConcurrentHashMap<LocalResourceRequest, Path>();
     }
     this.conf = conf;
+    this.stateStore = stateStore;
   }
 
   /*
@@ -119,8 +134,7 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
       if (rsrc != null && (!isResourcePresent(rsrc))) {
         LOG.info("Resource " + rsrc.getLocalPath()
             + " is missing, localizing it again");
-        localrsrc.remove(req);
-        decrementFileCountForLocalCacheDirectory(req, rsrc);
+        removeResource(req);
         rsrc = null;
       }
       if (null == rsrc) {
@@ -141,15 +155,102 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
       }
       break;
     case LOCALIZATION_FAILED:
-      decrementFileCountForLocalCacheDirectory(req, null);
       /*
        * If resource localization fails then Localized resource will be
        * removed from local cache.
        */
-      localrsrc.remove(req);
+      removeResource(req);
+      break;
+    case RECOVERED:
+      if (rsrc != null) {
+        LOG.warn("Ignoring attempt to recover existing resource " + rsrc);
+        return;
+      }
+      rsrc = recoverResource(req, (ResourceRecoveredEvent) event);
+      localrsrc.put(req, rsrc);
       break;
     }
+
     rsrc.handle(event);
+
+    if (event.getType() == ResourceEventType.LOCALIZED) {
+      if (rsrc.getLocalPath() != null) {
+        try {
+          stateStore.finishResourceLocalization(user, appId,
+              buildLocalizedResourceProto(rsrc));
+        } catch (IOException ioe) {
+          LOG.error("Error storing resource state for " + rsrc, ioe);
+        }
+      } else {
+        LOG.warn("Resource " + rsrc + " localized without a location");
+      }
+    }
+  }
+
+  private LocalizedResource recoverResource(LocalResourceRequest req,
+      ResourceRecoveredEvent event) {
+    // unique number for a resource is the directory of the resource
+    Path localDir = event.getLocalPath().getParent();
+    long rsrcId = Long.parseLong(localDir.getName());
+
+    // update ID generator to avoid conflicts with existing resources
+    while (true) {
+      long currentRsrcId = uniqueNumberGenerator.get();
+      long nextRsrcId = Math.max(currentRsrcId, rsrcId);
+      if (uniqueNumberGenerator.compareAndSet(currentRsrcId, nextRsrcId)) {
+        break;
+      }
+    }
+
+    incrementFileCountForLocalCacheDirectory(localDir.getParent());
+
+    return new LocalizedResource(req, dispatcher);
+  }
+
+  private LocalizedResourceProto buildLocalizedResourceProto(
+      LocalizedResource rsrc) {
+    return LocalizedResourceProto.newBuilder()
+        .setResource(buildLocalResourceProto(rsrc.getRequest()))
+        .setLocalPath(rsrc.getLocalPath().toString())
+        .setSize(rsrc.getSize())
+        .build();
+  }
+
+  private LocalResourceProto buildLocalResourceProto(LocalResource lr) {
+    LocalResourcePBImpl lrpb;
+    if (!(lr instanceof LocalResourcePBImpl)) {
+      lr = LocalResource.newInstance(lr.getResource(), lr.getType(),
+          lr.getVisibility(), lr.getSize(), lr.getTimestamp(),
+          lr.getPattern());
+    }
+    lrpb = (LocalResourcePBImpl) lr;
+    return lrpb.getProto();
+  }
+
+  public void incrementFileCountForLocalCacheDirectory(Path cacheDir) {
+    if (useLocalCacheDirectoryManager) {
+      Path cacheRoot = LocalCacheDirectoryManager.getCacheDirectoryRoot(
+          cacheDir);
+      if (cacheRoot != null) {
+        LocalCacheDirectoryManager dir = directoryManagers.get(cacheRoot);
+        if (dir == null) {
+          dir = new LocalCacheDirectoryManager(conf);
+          LocalCacheDirectoryManager otherDir =
+              directoryManagers.putIfAbsent(cacheRoot, dir);
+          if (otherDir != null) {
+            dir = otherDir;
+          }
+        }
+        if (cacheDir.equals(cacheRoot)) {
+          dir.incrementFileCountForPath("");
+        } else {
+          String dirStr = cacheDir.toUri().getRawPath();
+          String rootStr = cacheRoot.toUri().getRawPath();
+          dir.incrementFileCountForPath(
+              dirStr.substring(rootStr.length() + 1));
+        }
+      }
+    }
   }
 
   /*
@@ -216,11 +317,6 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
     return ret;
   }
   
-  @Override
-  public boolean contains(LocalResourceRequest resource) {
-    return localrsrc.containsKey(resource);
-  }
-
   @Override
   public boolean remove(LocalizedResource rem, DeletionService delService) {
  // current synchronization guaranteed by crude RLS event for cleanup
@@ -237,16 +333,31 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
           + " with non-zero refcount");
       return false;
     } else { // ResourceState is LOCALIZED or INIT
-      localrsrc.remove(rem.getRequest());
       if (ResourceState.LOCALIZED.equals(rsrc.getState())) {
         delService.delete(getUser(), getPathToDelete(rsrc.getLocalPath()));
       }
-      decrementFileCountForLocalCacheDirectory(rem.getRequest(), rsrc);
+      removeResource(rem.getRequest());
       LOG.info("Removed " + rsrc.getLocalPath() + " from localized cache");
       return true;
     }
   }
 
+  private void removeResource(LocalResourceRequest req) {
+    LocalizedResource rsrc = localrsrc.remove(req);
+    decrementFileCountForLocalCacheDirectory(req, rsrc);
+    if (rsrc != null) {
+      Path localPath = rsrc.getLocalPath();
+      if (localPath != null) {
+        try {
+          stateStore.removeLocalizedResource(user, appId, localPath);
+        } catch (IOException e) {
+          LOG.error("Unable to remove resource " + rsrc + " from state store",
+              e);
+        }
+      }
+    }
+  }
+
   /**
    * Returns the path up to the random directory component.
    */
@@ -285,6 +396,7 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
   @Override
   public Path
       getPathForLocalization(LocalResourceRequest req, Path localDirPath) {
+    Path rPath = localDirPath;
     if (useLocalCacheDirectoryManager && localDirPath != null) {
 
       if (!directoryManagers.containsKey(localDirPath)) {
@@ -293,7 +405,7 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
       }
       LocalCacheDirectoryManager dir = directoryManagers.get(localDirPath);
 
-      Path rPath = localDirPath;
+      rPath = localDirPath;
       String hierarchicalPath = dir.getRelativePathForLocalization();
       // For most of the scenarios we will get root path only which
       // is an empty string
@@ -301,21 +413,36 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
         rPath = new Path(localDirPath, hierarchicalPath);
       }
       inProgressLocalResourcesMap.put(req, rPath);
-      return rPath;
-    } else {
-      return localDirPath;
     }
-  }
 
-  @Override
-  public long nextUniqueNumber() {
-    return uniqueNumberGenerator.incrementAndGet();
+    rPath = new Path(rPath,
+        Long.toString(uniqueNumberGenerator.incrementAndGet()));
+    Path localPath = new Path(rPath, req.getPath().getName());
+    LocalizedResource rsrc = localrsrc.get(req);
+    rsrc.setLocalPath(localPath);
+    LocalResource lr = LocalResource.newInstance(req.getResource(),
+        req.getType(), req.getVisibility(), req.getSize(),
+        req.getTimestamp());
+    try {
+      stateStore.startResourceLocalization(user, appId,
+          ((LocalResourcePBImpl) lr).getProto(), localPath);
+    } catch (IOException e) {
+      LOG.error("Unable to record localization start for " + rsrc, e);
+    }
+    return rPath;
   }
 
-  @VisibleForTesting
-  @Private
   @Override
   public LocalizedResource getLocalizedResource(LocalResourceRequest request) {
     return localrsrc.get(request);
   }
-}
+
+  @VisibleForTesting
+  LocalCacheDirectoryManager getDirectoryManager(Path localDirPath) {
+    LocalCacheDirectoryManager mgr = null;
+    if (useLocalCacheDirectoryManager) {
+      mgr = directoryManagers.get(localDirPath);
+    }
+    return mgr;
+  }
+}

+ 20 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java

@@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.even
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceFailedLocalizationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceLocalizedEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRecoveredEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRequestEvent;
 import org.apache.hadoop.yarn.state.InvalidStateTransitonException;
@@ -54,8 +55,8 @@ public class LocalizedResource implements EventHandler<ResourceEvent> {
 
   private static final Log LOG = LogFactory.getLog(LocalizedResource.class);
 
-  Path localPath;
-  long size = -1;
+  volatile Path localPath;
+  volatile long size = -1;
   final LocalResourceRequest rsrc;
   final Dispatcher dispatcher;
   final StateMachine<ResourceState,ResourceEventType,ResourceEvent>
@@ -76,6 +77,8 @@ public class LocalizedResource implements EventHandler<ResourceEvent> {
     // From INIT (ref == 0, awaiting req)
     .addTransition(ResourceState.INIT, ResourceState.DOWNLOADING,
         ResourceEventType.REQUEST, new FetchResourceTransition())
+    .addTransition(ResourceState.INIT, ResourceState.LOCALIZED,
+        ResourceEventType.RECOVERED, new RecoveredTransition())
 
     // From DOWNLOADING (ref > 0, may be localizing)
     .addTransition(ResourceState.DOWNLOADING, ResourceState.DOWNLOADING,
@@ -157,6 +160,10 @@ public class LocalizedResource implements EventHandler<ResourceEvent> {
     return localPath;
   }
 
+  public void setLocalPath(Path localPath) {
+    this.localPath = Path.getPathWithoutSchemeAndAuthority(localPath);
+  }
+
   public long getTimestamp() {
     return timestamp.get();
   }
@@ -234,7 +241,8 @@ public class LocalizedResource implements EventHandler<ResourceEvent> {
     @Override
     public void transition(LocalizedResource rsrc, ResourceEvent event) {
       ResourceLocalizedEvent locEvent = (ResourceLocalizedEvent) event;
-      rsrc.localPath = locEvent.getLocation();
+      rsrc.localPath =
+          Path.getPathWithoutSchemeAndAuthority(locEvent.getLocation());
       rsrc.size = locEvent.getSize();
       for (ContainerId container : rsrc.ref) {
         rsrc.dispatcher.getEventHandler().handle(
@@ -291,4 +299,13 @@ public class LocalizedResource implements EventHandler<ResourceEvent> {
       rsrc.release(relEvent.getContainer());
     }
   }
+
+  private static class RecoveredTransition extends ResourceTransition {
+    @Override
+    public void transition(LocalizedResource rsrc, ResourceEvent event) {
+      ResourceRecoveredEvent recoveredEvent = (ResourceRecoveredEvent) event;
+      rsrc.localPath = recoveredEvent.getLocalPath();
+      rsrc.size = recoveredEvent.getSize();
+    }
+  }
 }

+ 111 - 33
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java

@@ -74,6 +74,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.impl.pb.LocalResourcePBImpl;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
@@ -81,6 +82,8 @@ import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService.FileDeletionTask;
@@ -109,10 +112,15 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.even
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerResourceRequestEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceFailedLocalizationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceLocalizedEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRecoveredEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRequestEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.security.LocalizerTokenIdentifier;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.security.LocalizerTokenSecretManager;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.LocalResourceTrackerState;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredLocalizationState;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredUserResources;
 import org.apache.hadoop.yarn.server.nodemanager.security.authorize.NMPolicyProvider;
 import org.apache.hadoop.yarn.server.nodemanager.util.NodeManagerBuilderUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
@@ -142,6 +150,7 @@ public class ResourceLocalizationService extends CompositeService
   private RecordFactory recordFactory;
   private final ScheduledExecutorService cacheCleanup;
   private LocalizerTokenSecretManager secretManager;
+  private NMStateStoreService stateStore;
 
   private LocalResourcesTracker publicRsrc;
 
@@ -163,7 +172,7 @@ public class ResourceLocalizationService extends CompositeService
 
   public ResourceLocalizationService(Dispatcher dispatcher,
       ContainerExecutor exec, DeletionService delService,
-      LocalDirsHandlerService dirsHandler) {
+      LocalDirsHandlerService dirsHandler, NMStateStoreService stateStore) {
 
     super(ResourceLocalizationService.class.getName());
     this.exec = exec;
@@ -175,6 +184,7 @@ public class ResourceLocalizationService extends CompositeService
         new ThreadFactoryBuilder()
           .setNameFormat("ResourceLocalizationService Cache Cleanup")
           .build());
+    this.stateStore = stateStore;
   }
 
   FileContext getLocalFileContext(Configuration conf) {
@@ -203,15 +213,17 @@ public class ResourceLocalizationService extends CompositeService
   @Override
   public void serviceInit(Configuration conf) throws Exception {
     this.validateConf(conf);
-    this.publicRsrc =
-        new LocalResourcesTrackerImpl(null, dispatcher, true, conf);
+    this.publicRsrc = new LocalResourcesTrackerImpl(null, null, dispatcher,
+        true, conf, stateStore);
     this.recordFactory = RecordFactoryProvider.getRecordFactory(conf);
 
     try {
       FileContext lfs = getLocalFileContext(conf);
       lfs.setUMask(new FsPermission((short)FsPermission.DEFAULT_UMASK));
 
-      cleanUpLocalDir(lfs,delService);
+      if (!stateStore.canRecover()) {
+        cleanUpLocalDir(lfs,delService);
+      }
 
       List<String> localDirs = dirsHandler.getLocalDirs();
       for (String localDir : localDirs) {
@@ -249,6 +261,74 @@ public class ResourceLocalizationService extends CompositeService
     super.serviceInit(conf);
   }
 
+  //Recover localized resources after an NM restart
+  public void recoverLocalizedResources(RecoveredLocalizationState state)
+      throws URISyntaxException {
+    LocalResourceTrackerState trackerState = state.getPublicTrackerState();
+    recoverTrackerResources(publicRsrc, trackerState);
+
+    for (Map.Entry<String, RecoveredUserResources> userEntry :
+         state.getUserResources().entrySet()) {
+      String user = userEntry.getKey();
+      RecoveredUserResources userResources = userEntry.getValue();
+      trackerState = userResources.getPrivateTrackerState();
+      if (!trackerState.isEmpty()) {
+        LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user,
+            null, dispatcher, true, super.getConfig(), stateStore);
+        LocalResourcesTracker oldTracker = privateRsrc.putIfAbsent(user,
+            tracker);
+        if (oldTracker != null) {
+          tracker = oldTracker;
+        }
+        recoverTrackerResources(tracker, trackerState);
+      }
+
+      for (Map.Entry<ApplicationId, LocalResourceTrackerState> appEntry :
+           userResources.getAppTrackerStates().entrySet()) {
+        trackerState = appEntry.getValue();
+        if (!trackerState.isEmpty()) {
+          ApplicationId appId = appEntry.getKey();
+          String appIdStr = ConverterUtils.toString(appId);
+          LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user,
+              appId, dispatcher, false, super.getConfig(), stateStore);
+          LocalResourcesTracker oldTracker = appRsrc.putIfAbsent(appIdStr,
+              tracker);
+          if (oldTracker != null) {
+            tracker = oldTracker;
+          }
+          recoverTrackerResources(tracker, trackerState);
+        }
+      }
+    }
+  }
+
+  private void recoverTrackerResources(LocalResourcesTracker tracker,
+      LocalResourceTrackerState state) throws URISyntaxException {
+    for (LocalizedResourceProto proto : state.getLocalizedResources()) {
+      LocalResource rsrc = new LocalResourcePBImpl(proto.getResource());
+      LocalResourceRequest req = new LocalResourceRequest(rsrc);
+      LOG.info("Recovering localized resource " + req + " at "
+          + proto.getLocalPath());
+      tracker.handle(new ResourceRecoveredEvent(req,
+          new Path(proto.getLocalPath()), proto.getSize()));
+    }
+
+    for (Map.Entry<LocalResourceProto, Path> entry :
+         state.getInProgressResources().entrySet()) {
+      LocalResource rsrc = new LocalResourcePBImpl(entry.getKey());
+      LocalResourceRequest req = new LocalResourceRequest(rsrc);
+      Path localPath = entry.getValue();
+      tracker.handle(new ResourceRecoveredEvent(req, localPath, 0));
+
+      // delete any in-progress localizations, containers will request again
+      LOG.info("Deleting in-progress localization for " + req + " at "
+          + localPath);
+      tracker.remove(tracker.getLocalizedResource(req), delService);
+    }
+
+    // TODO: remove untracked directories in local filesystem
+  }
+
   @Override
   public LocalizerHeartbeatResponse heartbeat(LocalizerStatus status) {
     return localizerTracker.processHeartbeat(status);
@@ -337,17 +417,10 @@ public class ResourceLocalizationService extends CompositeService
     // 0) Create application tracking structs
     String userName = app.getUser();
     privateRsrc.putIfAbsent(userName, new LocalResourcesTrackerImpl(userName,
-      dispatcher, true, super.getConfig()));
-    if (null != appRsrc.putIfAbsent(
-      ConverterUtils.toString(app.getAppId()),
-      new LocalResourcesTrackerImpl(app.getUser(), dispatcher, false, super
-        .getConfig()))) {
-      LOG.warn("Initializing application " + app + " already present");
-      assert false; // TODO: FIXME assert doesn't help
-                    // ^ The condition is benign. Tests should fail and it
-                    // should appear in logs, but it's an internal error
-                    // that should have no effect on applications
-    }
+        null, dispatcher, true, super.getConfig(), stateStore));
+    String appIdStr = ConverterUtils.toString(app.getAppId());
+    appRsrc.putIfAbsent(appIdStr, new LocalResourcesTrackerImpl(app.getUser(),
+        app.getAppId(), dispatcher, false, super.getConfig(), stateStore));
     // 1) Signal container init
     //
     // This is handled by the ApplicationImpl state machine and allows
@@ -446,18 +519,28 @@ public class ResourceLocalizationService extends CompositeService
 
   @SuppressWarnings({"unchecked"})
   private void handleDestroyApplicationResources(Application application) {
-    String userName;
-    String appIDStr;
+    String userName = application.getUser();
+    ApplicationId appId = application.getAppId();
+    String appIDStr = application.toString();
     LocalResourcesTracker appLocalRsrcsTracker =
-      appRsrc.remove(ConverterUtils.toString(application.getAppId()));
-    if (null == appLocalRsrcsTracker) {
+      appRsrc.remove(ConverterUtils.toString(appId));
+    if (appLocalRsrcsTracker != null) {
+      for (LocalizedResource rsrc : appLocalRsrcsTracker ) {
+        Path localPath = rsrc.getLocalPath();
+        if (localPath != null) {
+          try {
+            stateStore.removeLocalizedResource(userName, appId, localPath);
+          } catch (IOException e) {
+            LOG.error("Unable to remove resource " + rsrc + " for " + appIDStr
+                + " from state store", e);
+          }
+        }
+      }
+    } else {
       LOG.warn("Removing uninitialized application " + application);
     }
-    // TODO: What to do with appLocalRsrcsTracker?
 
     // Delete the application directories
-    userName = application.getUser();
-    appIDStr = application.toString();
     for (String localDir : dirsHandler.getLocalDirs()) {
 
       // Delete the user-owned app-dir
@@ -668,19 +751,15 @@ public class ResourceLocalizationService extends CompositeService
         if (rsrc.getState().equals(ResourceState.DOWNLOADING)) {
           LocalResource resource = request.getResource().getRequest();
           try {
-            Path publicDirDestPath =
+            Path publicRootPath =
                 dirsHandler.getLocalPathForWrite("." + Path.SEPARATOR
                     + ContainerLocalizer.FILECACHE,
                   ContainerLocalizer.getEstimatedSize(resource), true);
-            Path hierarchicalPath =
-                publicRsrc.getPathForLocalization(key, publicDirDestPath);
-            if (!hierarchicalPath.equals(publicDirDestPath)) {
-              publicDirDestPath = hierarchicalPath;
+            Path publicDirDestPath =
+                publicRsrc.getPathForLocalization(key, publicRootPath);
+            if (!publicDirDestPath.getParent().equals(publicRootPath)) {
               DiskChecker.checkDir(new File(publicDirDestPath.toUri().getPath()));
             }
-            publicDirDestPath =
-                new Path(publicDirDestPath, Long.toString(publicRsrc
-                  .nextUniqueNumber()));
             // explicitly synchronize pending here to avoid future task
             // completing and being dequeued before pending updated
             synchronized (pending) {
@@ -968,9 +1047,8 @@ public class ResourceLocalizationService extends CompositeService
       Path dirPath =
           dirsHandler.getLocalPathForWrite(cacheDirectory,
             ContainerLocalizer.getEstimatedSize(rsrc), false);
-      dirPath = tracker.getPathForLocalization(new LocalResourceRequest(rsrc),
-        dirPath);
-      return new Path (dirPath, Long.toString(tracker.nextUniqueNumber()));
+      return tracker.getPathForLocalization(new LocalResourceRequest(rsrc),
+          dirPath);
     }
 
     @Override

+ 3 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceEventType.java

@@ -31,5 +31,7 @@ public enum ResourceEventType {
   /** See {@link ResourceReleaseEvent} */
   RELEASE,
   /** See {@link ResourceFailedLocalizationEvent} */
-  LOCALIZATION_FAILED
+  LOCALIZATION_FAILED,
+  /** See {@link ResourceRecoveredEvent} */
+  RECOVERED
 }

+ 43 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceRecoveredEvent.java

@@ -0,0 +1,43 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.LocalResourceRequest;
+
+public class ResourceRecoveredEvent extends ResourceEvent {
+
+  private final Path localPath;
+  private final long size;
+
+  public ResourceRecoveredEvent(LocalResourceRequest rsrc, Path localPath,
+      long size) {
+    super(rsrc, ResourceEventType.RECOVERED);
+    this.localPath = localPath;
+    this.size = size;
+  }
+
+  public Path getLocalPath() {
+    return localPath;
+  }
+
+  public long getSize() {
+    return size;
+  }
+}

+ 377 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java

@@ -0,0 +1,377 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.recovery;
+
+import static org.fusesource.leveldbjni.JniDBFactory.asString;
+import static org.fusesource.leveldbjni.JniDBFactory.bytes;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto;
+import org.apache.hadoop.yarn.server.utils.LeveldbIterator;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.fusesource.leveldbjni.JniDBFactory;
+import org.fusesource.leveldbjni.internal.NativeDB;
+import org.iq80.leveldb.DB;
+import org.iq80.leveldb.DBException;
+import org.iq80.leveldb.Logger;
+import org.iq80.leveldb.Options;
+import org.iq80.leveldb.WriteBatch;
+
+public class NMLeveldbStateStoreService extends NMStateStoreService {
+
+  public static final Log LOG =
+      LogFactory.getLog(NMLeveldbStateStoreService.class);
+
+  private static final String DB_NAME = "yarn-nm-state";
+  private static final String DB_SCHEMA_VERSION_KEY = "schema-version";
+  private static final String DB_SCHEMA_VERSION = "1.0";
+
+  private static final String LOCALIZATION_KEY_PREFIX = "Localization/";
+  private static final String LOCALIZATION_PUBLIC_KEY_PREFIX =
+      LOCALIZATION_KEY_PREFIX + "public/";
+  private static final String LOCALIZATION_PRIVATE_KEY_PREFIX =
+      LOCALIZATION_KEY_PREFIX + "private/";
+  private static final String LOCALIZATION_STARTED_SUFFIX = "started/";
+  private static final String LOCALIZATION_COMPLETED_SUFFIX = "completed/";
+  private static final String LOCALIZATION_FILECACHE_SUFFIX = "filecache/";
+  private static final String LOCALIZATION_APPCACHE_SUFFIX = "appcache/";
+
+  private DB db;
+
+  public NMLeveldbStateStoreService() {
+    super(NMLeveldbStateStoreService.class.getName());
+  }
+
+  @Override
+  protected void startStorage() throws IOException {
+  }
+
+  @Override
+  protected void closeStorage() throws IOException {
+    if (db != null) {
+      db.close();
+    }
+  }
+
+
+  @Override
+  public RecoveredLocalizationState loadLocalizationState()
+      throws IOException {
+    RecoveredLocalizationState state = new RecoveredLocalizationState();
+
+    try {
+      LeveldbIterator iter = new LeveldbIterator(db);
+      iter.seek(bytes(LOCALIZATION_PUBLIC_KEY_PREFIX));
+      state.publicTrackerState = loadResourceTrackerState(iter,
+          LOCALIZATION_PUBLIC_KEY_PREFIX);
+
+      iter.seek(bytes(LOCALIZATION_PRIVATE_KEY_PREFIX));
+      while (iter.hasNext()) {
+        Entry<byte[],byte[]> entry = iter.peekNext();
+        String key = asString(entry.getKey());
+        if (!key.startsWith(LOCALIZATION_PRIVATE_KEY_PREFIX)) {
+          break;
+        }
+
+        int userEndPos = key.indexOf('/',
+            LOCALIZATION_PRIVATE_KEY_PREFIX.length());
+        if (userEndPos < 0) {
+          throw new IOException("Unable to determine user in resource key: "
+              + key);
+        }
+        String user = key.substring(
+            LOCALIZATION_PRIVATE_KEY_PREFIX.length(), userEndPos);
+        state.userResources.put(user, loadUserLocalizedResources(iter,
+            key.substring(0, userEndPos+1)));
+      }
+    } catch (DBException e) {
+      throw new IOException(e.getMessage(), e);
+    }
+
+    return state;
+  }
+
+  private LocalResourceTrackerState loadResourceTrackerState(
+      LeveldbIterator iter, String keyPrefix) throws IOException {
+    final String completedPrefix = keyPrefix + LOCALIZATION_COMPLETED_SUFFIX;
+    final String startedPrefix = keyPrefix + LOCALIZATION_STARTED_SUFFIX;
+    LocalResourceTrackerState state = new LocalResourceTrackerState();
+    while (iter.hasNext()) {
+      Entry<byte[],byte[]> entry = iter.peekNext();
+      String key = asString(entry.getKey());
+      if (!key.startsWith(keyPrefix)) {
+        break;
+      }
+
+      if (key.startsWith(completedPrefix)) {
+        state.localizedResources = loadCompletedResources(iter,
+            completedPrefix);
+      } else if (key.startsWith(startedPrefix)) {
+        state.inProgressResources = loadStartedResources(iter, startedPrefix);
+      } else {
+        throw new IOException("Unexpected key in resource tracker state: "
+            + key);
+      }
+    }
+
+    return state;
+  }
+
+  private List<LocalizedResourceProto> loadCompletedResources(
+      LeveldbIterator iter, String keyPrefix) throws IOException {
+    List<LocalizedResourceProto> rsrcs =
+        new ArrayList<LocalizedResourceProto>();
+    while (iter.hasNext()) {
+      Entry<byte[],byte[]> entry = iter.peekNext();
+      String key = asString(entry.getKey());
+      if (!key.startsWith(keyPrefix)) {
+        break;
+      }
+
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Loading completed resource from " + key);
+      }
+      rsrcs.add(LocalizedResourceProto.parseFrom(entry.getValue()));
+      iter.next();
+    }
+
+    return rsrcs;
+  }
+
+  private Map<LocalResourceProto, Path> loadStartedResources(
+      LeveldbIterator iter, String keyPrefix) throws IOException {
+    Map<LocalResourceProto, Path> rsrcs =
+        new HashMap<LocalResourceProto, Path>();
+    while (iter.hasNext()) {
+      Entry<byte[],byte[]> entry = iter.peekNext();
+      String key = asString(entry.getKey());
+      if (!key.startsWith(keyPrefix)) {
+        break;
+      }
+
+      Path localPath = new Path(key.substring(keyPrefix.length()));
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Loading in-progress resource at " + localPath);
+      }
+      rsrcs.put(LocalResourceProto.parseFrom(entry.getValue()), localPath);
+      iter.next();
+    }
+
+    return rsrcs;
+  }
+
+  private RecoveredUserResources loadUserLocalizedResources(
+      LeveldbIterator iter, String keyPrefix) throws IOException {
+    RecoveredUserResources userResources = new RecoveredUserResources();
+    while (iter.hasNext()) {
+      Entry<byte[],byte[]> entry = iter.peekNext();
+      String key = asString(entry.getKey());
+      if (!key.startsWith(keyPrefix)) {
+        break;
+      }
+
+      if (key.startsWith(LOCALIZATION_FILECACHE_SUFFIX, keyPrefix.length())) {
+        userResources.privateTrackerState = loadResourceTrackerState(iter,
+            keyPrefix + LOCALIZATION_FILECACHE_SUFFIX);
+      } else if (key.startsWith(LOCALIZATION_APPCACHE_SUFFIX,
+          keyPrefix.length())) {
+        int appIdStartPos = keyPrefix.length() +
+            LOCALIZATION_APPCACHE_SUFFIX.length();
+        int appIdEndPos = key.indexOf('/', appIdStartPos);
+        if (appIdEndPos < 0) {
+          throw new IOException("Unable to determine appID in resource key: "
+              + key);
+        }
+        ApplicationId appId = ConverterUtils.toApplicationId(
+            key.substring(appIdStartPos, appIdEndPos));
+        userResources.appTrackerStates.put(appId,
+            loadResourceTrackerState(iter, key.substring(0, appIdEndPos+1)));
+      } else {
+        throw new IOException("Unexpected user resource key " + key);
+      }
+    }
+    return userResources;
+  }
+
+  @Override
+  public void startResourceLocalization(String user, ApplicationId appId,
+      LocalResourceProto proto, Path localPath) throws IOException {
+    String key = getResourceStartedKey(user, appId, localPath.toString());
+    try {
+      db.put(bytes(key), proto.toByteArray());
+    } catch (DBException e) {
+      throw new IOException(e.getMessage(), e);
+    }
+  }
+
+  @Override
+  public void finishResourceLocalization(String user, ApplicationId appId,
+      LocalizedResourceProto proto) throws IOException {
+    String localPath = proto.getLocalPath();
+    String startedKey = getResourceStartedKey(user, appId, localPath);
+    String completedKey = getResourceCompletedKey(user, appId, localPath);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Storing localized resource to " + completedKey);
+    }
+    try {
+      WriteBatch batch = db.createWriteBatch();
+      try {
+        batch.delete(bytes(startedKey));
+        batch.put(bytes(completedKey), proto.toByteArray());
+        db.write(batch);
+      } finally {
+        batch.close();
+      }
+    } catch (DBException e) {
+      throw new IOException(e.getMessage(), e);
+    }
+  }
+
+  @Override
+  public void removeLocalizedResource(String user, ApplicationId appId,
+      Path localPath) throws IOException {
+    String localPathStr = localPath.toString();
+    String startedKey = getResourceStartedKey(user, appId, localPathStr);
+    String completedKey = getResourceCompletedKey(user, appId, localPathStr);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Removing local resource at " + localPathStr);
+    }
+    try {
+      WriteBatch batch = db.createWriteBatch();
+      try {
+        batch.delete(bytes(startedKey));
+        batch.delete(bytes(completedKey));
+        db.write(batch);
+      } finally {
+        batch.close();
+      }
+    } catch (DBException e) {
+      throw new IOException(e.getMessage(), e);
+    }
+  }
+
+  private String getResourceStartedKey(String user, ApplicationId appId,
+      String localPath) {
+    return getResourceTrackerKeyPrefix(user, appId)
+        + LOCALIZATION_STARTED_SUFFIX + localPath;
+  }
+
+  private String getResourceCompletedKey(String user, ApplicationId appId,
+      String localPath) {
+    return getResourceTrackerKeyPrefix(user, appId)
+        + LOCALIZATION_COMPLETED_SUFFIX + localPath;
+  }
+
+  private String getResourceTrackerKeyPrefix(String user,
+      ApplicationId appId) {
+    if (user == null) {
+      return LOCALIZATION_PUBLIC_KEY_PREFIX;
+    }
+    if (appId == null) {
+      return LOCALIZATION_PRIVATE_KEY_PREFIX + user + "/"
+          + LOCALIZATION_FILECACHE_SUFFIX;
+    }
+    return LOCALIZATION_PRIVATE_KEY_PREFIX + user + "/"
+        + LOCALIZATION_APPCACHE_SUFFIX + appId + "/";
+  }
+
+
+  @Override
+  protected void initStorage(Configuration conf)
+      throws IOException {
+    Path storeRoot = createStorageDir(conf);
+    Options options = new Options();
+    options.createIfMissing(false);
+    options.logger(new LeveldbLogger());
+    LOG.info("Using state database at " + storeRoot + " for recovery");
+    File dbfile = new File(storeRoot.toString());
+    byte[] schemaVersionData = null;
+    try {
+      db = JniDBFactory.factory.open(dbfile, options);
+      try {
+        schemaVersionData = db.get(bytes(DB_SCHEMA_VERSION_KEY));
+      } catch (DBException e) {
+        throw new IOException(e.getMessage(), e);
+      }
+    } catch (NativeDB.DBException e) {
+      if (e.isNotFound() || e.getMessage().contains(" does not exist ")) {
+        LOG.info("Creating state database at " + dbfile);
+        options.createIfMissing(true);
+        try {
+          db = JniDBFactory.factory.open(dbfile, options);
+          schemaVersionData = bytes(DB_SCHEMA_VERSION);
+          db.put(bytes(DB_SCHEMA_VERSION_KEY), schemaVersionData);
+        } catch (DBException dbErr) {
+          throw new IOException(dbErr.getMessage(), dbErr);
+        }
+      } else {
+        throw e;
+      }
+    }
+    if (schemaVersionData != null) {
+      String schemaVersion = asString(schemaVersionData);
+      // only support exact schema matches for now
+      if (!DB_SCHEMA_VERSION.equals(schemaVersion)) {
+        throw new IOException("Incompatible state database schema, found "
+            + schemaVersion + " expected " + DB_SCHEMA_VERSION);
+      }
+    } else {
+      throw new IOException("State database schema version not found");
+    }
+  }
+
+  private Path createStorageDir(Configuration conf) throws IOException {
+    final String storeUri = conf.get(YarnConfiguration.NM_RECOVERY_DIR);
+    if (storeUri == null) {
+      throw new IOException("No store location directory configured in " +
+          YarnConfiguration.NM_RECOVERY_DIR);
+    }
+
+    Path root = new Path(storeUri, DB_NAME);
+    FileSystem fs = FileSystem.getLocal(conf);
+    fs.mkdirs(root, new FsPermission((short)0700));
+    return root;
+  }
+
+
+  private static class LeveldbLogger implements Logger {
+    private static final Log LOG = LogFactory.getLog(LeveldbLogger.class);
+
+    @Override
+    public void log(String message) {
+      LOG.info(message);
+    }
+  }
+}

+ 74 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java

@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.recovery;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto;
+
+// The state store to use when state isn't being stored
+public class NMNullStateStoreService extends NMStateStoreService {
+
+  public NMNullStateStoreService() {
+    super(NMNullStateStoreService.class.getName());
+  }
+
+  @Override
+  public boolean canRecover() {
+    return false;
+  }
+
+  @Override
+  public RecoveredLocalizationState loadLocalizationState()
+      throws IOException {
+    throw new UnsupportedOperationException(
+        "Recovery not supported by this state store");
+  }
+
+  @Override
+  public void startResourceLocalization(String user, ApplicationId appId,
+      LocalResourceProto proto, Path localPath) throws IOException {
+  }
+
+  @Override
+  public void finishResourceLocalization(String user, ApplicationId appId,
+      LocalizedResourceProto proto) throws IOException {
+  }
+
+  @Override
+  public void removeLocalizedResource(String user, ApplicationId appId,
+      Path localPath) throws IOException {
+  }
+
+  @Override
+  protected void initStorage(Configuration conf) throws IOException {
+  }
+
+  @Override
+  protected void startStorage() throws IOException {
+  }
+
+  @Override
+  protected void closeStorage() throws IOException {
+  }
+}

+ 163 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java

@@ -0,0 +1,163 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.server.nodemanager.recovery;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto;
+
+@Private
+@Unstable
+public abstract class NMStateStoreService extends AbstractService {
+
+  public NMStateStoreService(String name) {
+    super(name);
+  }
+
+  public static class LocalResourceTrackerState {
+    List<LocalizedResourceProto> localizedResources =
+        new ArrayList<LocalizedResourceProto>();
+    Map<LocalResourceProto, Path> inProgressResources =
+        new HashMap<LocalResourceProto, Path>();
+
+    public List<LocalizedResourceProto> getLocalizedResources() {
+      return localizedResources;
+    }
+
+    public Map<LocalResourceProto, Path> getInProgressResources() {
+      return inProgressResources;
+    }
+
+    public boolean isEmpty() {
+      return localizedResources.isEmpty() && inProgressResources.isEmpty();
+    }
+  }
+
+  public static class RecoveredUserResources {
+    LocalResourceTrackerState privateTrackerState =
+        new LocalResourceTrackerState();
+    Map<ApplicationId, LocalResourceTrackerState> appTrackerStates =
+        new HashMap<ApplicationId, LocalResourceTrackerState>();
+
+    public LocalResourceTrackerState getPrivateTrackerState() {
+      return privateTrackerState;
+    }
+
+    public Map<ApplicationId, LocalResourceTrackerState>
+    getAppTrackerStates() {
+      return appTrackerStates;
+    }
+  }
+
+  public static class RecoveredLocalizationState {
+    LocalResourceTrackerState publicTrackerState =
+        new LocalResourceTrackerState();
+    Map<String, RecoveredUserResources> userResources =
+        new HashMap<String, RecoveredUserResources>();
+
+    public LocalResourceTrackerState getPublicTrackerState() {
+      return publicTrackerState;
+    }
+
+    public Map<String, RecoveredUserResources> getUserResources() {
+      return userResources;
+    }
+  }
+
+  /** Initialize the state storage */
+  @Override
+  public void serviceInit(Configuration conf) throws IOException {
+    initStorage(conf);
+  }
+
+  /** Start the state storage for use */
+  @Override
+  public void serviceStart() throws IOException {
+    startStorage();
+  }
+
+  /** Shutdown the state storage. */
+  @Override
+  public void serviceStop() throws IOException {
+    closeStorage();
+  }
+
+  public boolean canRecover() {
+    return true;
+  }
+
+
+  /**
+   * Load the state of localized resources
+   * @return recovered localized resource state
+   * @throws IOException
+   */
+  public abstract RecoveredLocalizationState loadLocalizationState()
+      throws IOException;
+
+  /**
+   * Record the start of localization for a resource
+   * @param user the username or null if the resource is public
+   * @param appId the application ID if the resource is app-specific or null
+   * @param proto the resource request
+   * @param localPath local filesystem path where the resource will be stored
+   * @throws IOException
+   */
+  public abstract void startResourceLocalization(String user,
+      ApplicationId appId, LocalResourceProto proto, Path localPath)
+          throws IOException;
+
+  /**
+   * Record the completion of a resource localization
+   * @param user the username or null if the resource is public
+   * @param appId the application ID if the resource is app-specific or null
+   * @param proto the serialized localized resource
+   * @throws IOException
+   */
+  public abstract void finishResourceLocalization(String user,
+      ApplicationId appId, LocalizedResourceProto proto) throws IOException;
+
+  /**
+   * Remove records related to a resource localization
+   * @param user the username or null if the resource is public
+   * @param appId the application ID if the resource is app-specific or null
+   * @param localPath local filesystem path where the resource will be stored
+   * @throws IOException
+   */
+  public abstract void removeLocalizedResource(String user,
+      ApplicationId appId, Path localPath) throws IOException;
+
+
+  protected abstract void initStorage(Configuration conf) throws IOException;
+
+  protected abstract void startStorage() throws IOException;
+
+  protected abstract void closeStorage() throws IOException;
+}

+ 31 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_recovery.proto

@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option java_package = "org.apache.hadoop.yarn.proto";
+option java_outer_classname = "YarnServerNodemanagerRecoveryProtos";
+option java_generic_services = true;
+option java_generate_equals_and_hash = true;
+package hadoop.yarn;
+
+import "yarn_protos.proto";
+
+message LocalizedResourceProto {
+  optional LocalResourceProto resource = 1;
+  optional string localPath = 2;
+  optional int64 size = 3;
+}

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java

@@ -54,6 +54,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.even
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.LogHandler;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerEvent;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 
 public class DummyContainerManager extends ContainerManagerImpl {
@@ -75,7 +76,7 @@ public class DummyContainerManager extends ContainerManagerImpl {
   protected ResourceLocalizationService createResourceLocalizationService(
       ContainerExecutor exec, DeletionService deletionContext) {
     return new ResourceLocalizationService(super.dispatcher, exec,
-        deletionContext, super.dirsHandler) {
+        deletionContext, super.dirsHandler, new NMNullStateStoreService()) {
       @Override
       public void handle(LocalizationEvent event) {
         switch (event.getType()) {

+ 3 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java

@@ -44,6 +44,7 @@ import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.TestContainerManager;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
@@ -79,7 +80,8 @@ public class TestEventFlow {
     YarnConfiguration conf = new YarnConfiguration();
     
     Context context = new NMContext(new NMContainerTokenSecretManager(conf),
-        new NMTokenSecretManagerInNM(), null, null) {
+        new NMTokenSecretManagerInNM(), null, null,
+        new NMNullStateStoreService()) {
       @Override
       public int getHttpPort() {
         return 1234;

+ 30 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java

@@ -108,6 +108,36 @@ public class TestNodeManagerShutdown {
     localFS.delete(new Path(basedir.getPath()), true);
   }
   
+  @Test
+  public void testStateStoreRemovalOnDecommission() throws IOException {
+    final File recoveryDir = new File(basedir, "nm-recovery");
+    nm = new TestNodeManager();
+    YarnConfiguration conf = createNMConfig();
+    conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
+    conf.set(YarnConfiguration.NM_RECOVERY_DIR, recoveryDir.getAbsolutePath());
+
+    // verify state store is not removed on normal shutdown
+    nm.init(conf);
+    nm.start();
+    Assert.assertTrue(recoveryDir.exists());
+    Assert.assertTrue(recoveryDir.isDirectory());
+    nm.stop();
+    nm = null;
+    Assert.assertTrue(recoveryDir.exists());
+    Assert.assertTrue(recoveryDir.isDirectory());
+
+    // verify state store is removed on decommissioned shutdown
+    nm = new TestNodeManager();
+    nm.init(conf);
+    nm.start();
+    Assert.assertTrue(recoveryDir.exists());
+    Assert.assertTrue(recoveryDir.isDirectory());
+    nm.getNMContext().setDecommissioned(true);
+    nm.stop();
+    nm = null;
+    Assert.assertFalse(recoveryDir.exists());
+  }
+
   @Test
   public void testKillContainersOnShutdown() throws IOException,
       YarnException {

+ 6 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java

@@ -91,6 +91,8 @@ import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
 
 @SuppressWarnings("rawtypes")
 public class TestNodeStatusUpdater {
@@ -1159,7 +1161,8 @@ public class TestNodeStatusUpdater {
       @Override
       protected NMContext createNMContext(
           NMContainerTokenSecretManager containerTokenSecretManager,
-          NMTokenSecretManagerInNM nmTokenSecretManager) {
+          NMTokenSecretManagerInNM nmTokenSecretManager,
+          NMStateStoreService store) {
         return new MyNMContext(containerTokenSecretManager,
           nmTokenSecretManager);
       }
@@ -1268,7 +1271,8 @@ public class TestNodeStatusUpdater {
     public MyNMContext(
         NMContainerTokenSecretManager containerTokenSecretManager,
         NMTokenSecretManagerInNM nmTokenSecretManager) {
-      super(containerTokenSecretManager, nmTokenSecretManager, null, null);
+      super(containerTokenSecretManager, nmTokenSecretManager, null, null,
+          new NMNullStateStoreService());
     }
 
     @Override

+ 3 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java

@@ -64,6 +64,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Ap
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
@@ -103,7 +104,8 @@ public abstract class BaseContainerManagerTest {
   protected static final int HTTP_PORT = 5412;
   protected Configuration conf = new YarnConfiguration();
   protected Context context = new NMContext(new NMContainerTokenSecretManager(
-    conf), new NMTokenSecretManagerInNM(), null, new ApplicationACLsManager(conf)) {
+    conf), new NMTokenSecretManagerInNM(), null,
+    new ApplicationACLsManager(conf), new NMNullStateStoreService()) {
     public int getHttpPort() {
       return HTTP_PORT;
     };

+ 47 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalCacheDirectoryManager.java

@@ -23,6 +23,7 @@ import org.junit.Assert;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.LocalCacheDirectoryManager.Directory;
 import org.junit.Test;
 
 public class TestLocalCacheDirectoryManager {
@@ -73,7 +74,7 @@ public class TestLocalCacheDirectoryManager {
     conf.set(YarnConfiguration.NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY, "1");
     Exception e = null;
     ResourceLocalizationService service =
-        new ResourceLocalizationService(null, null, null, null);
+        new ResourceLocalizationService(null, null, null, null, null);
     try {
       service.init(conf);
     } catch (Exception e1) {
@@ -109,4 +110,49 @@ public class TestLocalCacheDirectoryManager {
     // first sub directory
     Assert.assertEquals(firstSubDir, dir.getRelativePathForLocalization());
   }
+
+  @Test
+  public void testDirectoryConversion() {
+    for (int i = 0; i < 10000; ++i) {
+      String path = Directory.getRelativePath(i);
+      Assert.assertEquals("Incorrect conversion for " + i, i,
+          Directory.getDirectoryNumber(path));
+    }
+  }
+
+  @Test
+  public void testIncrementFileCountForPath() {
+    YarnConfiguration conf = new YarnConfiguration();
+    conf.setInt(YarnConfiguration.NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY,
+        LocalCacheDirectoryManager.DIRECTORIES_PER_LEVEL + 2);
+    LocalCacheDirectoryManager mgr = new LocalCacheDirectoryManager(conf);
+    final String rootPath = "";
+    mgr.incrementFileCountForPath(rootPath);
+    Assert.assertEquals(rootPath, mgr.getRelativePathForLocalization());
+    Assert.assertFalse("root dir should be full",
+        rootPath.equals(mgr.getRelativePathForLocalization()));
+    // finish filling the other directory
+    mgr.getRelativePathForLocalization();
+    // free up space in the root dir
+    mgr.decrementFileCountForPath(rootPath);
+    mgr.decrementFileCountForPath(rootPath);
+    Assert.assertEquals(rootPath, mgr.getRelativePathForLocalization());
+    Assert.assertEquals(rootPath, mgr.getRelativePathForLocalization());
+    String otherDir = mgr.getRelativePathForLocalization();
+    Assert.assertFalse("root dir should be full", otherDir.equals(rootPath));
+
+    final String deepDir0 = "d/e/e/p/0";
+    final String deepDir1 = "d/e/e/p/1";
+    final String deepDir2 = "d/e/e/p/2";
+    final String deepDir3 = "d/e/e/p/3";
+    mgr.incrementFileCountForPath(deepDir0);
+    Assert.assertEquals(otherDir, mgr.getRelativePathForLocalization());
+    Assert.assertEquals(deepDir0, mgr.getRelativePathForLocalization());
+    Assert.assertEquals("total dir count incorrect after increment",
+        deepDir1, mgr.getRelativePathForLocalization());
+    mgr.incrementFileCountForPath(deepDir2);
+    mgr.incrementFileCountForPath(deepDir1);
+    mgr.incrementFileCountForPath(deepDir2);
+    Assert.assertEquals(deepDir3, mgr.getRelativePathForLocalization());
+  }
 }

+ 310 - 8
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalResourcesTrackerImpl.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer;
 
 import static org.mockito.Mockito.any;
 import static org.mockito.Matchers.isA;
+import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
@@ -34,13 +35,17 @@ import org.junit.Assert;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.LocalResourceType;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.impl.pb.LocalResourcePBImpl;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.DrainDispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType;
@@ -52,10 +57,14 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.even
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceFailedLocalizationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceLocalizedEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRecoveredEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRequestEvent;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.junit.Test;
+import org.mockito.ArgumentCaptor;
 
 public class TestLocalResourcesTrackerImpl {
 
@@ -92,8 +101,8 @@ public class TestLocalResourcesTrackerImpl {
       localrsrc.put(req1, lr1);
       localrsrc.put(req2, lr2);
       LocalResourcesTracker tracker =
-          new LocalResourcesTrackerImpl(user, dispatcher, localrsrc, false,
-            conf);
+          new LocalResourcesTrackerImpl(user, null, dispatcher, localrsrc,
+              false, conf, new NMNullStateStoreService());
 
       ResourceEvent req11Event =
           new ResourceRequestEvent(req1, LocalResourceVisibility.PUBLIC, lc1);
@@ -176,7 +185,8 @@ public class TestLocalResourcesTrackerImpl {
       ConcurrentMap<LocalResourceRequest, LocalizedResource> localrsrc = new ConcurrentHashMap<LocalResourceRequest, LocalizedResource>();
       localrsrc.put(req1, lr1);
       LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user,
-          dispatcher, localrsrc, false, conf);
+          null, dispatcher, localrsrc, false, conf,
+          new NMNullStateStoreService());
 
       ResourceEvent req11Event = new ResourceRequestEvent(req1,
           LocalResourceVisibility.PUBLIC, lc1);
@@ -246,7 +256,8 @@ public class TestLocalResourcesTrackerImpl {
       ConcurrentMap<LocalResourceRequest, LocalizedResource> localrsrc =
           new ConcurrentHashMap<LocalResourceRequest, LocalizedResource>();
       LocalResourcesTracker tracker =
-          new LocalResourcesTrackerImpl(user, dispatcher, localrsrc, true, conf);
+          new LocalResourcesTrackerImpl(user, null, dispatcher, localrsrc,
+              true, conf, new NMNullStateStoreService());
 
       LocalResourceRequest lr =
           createLocalResourceRequest(user, 1, 1, LocalResourceVisibility.PUBLIC);
@@ -264,6 +275,7 @@ public class TestLocalResourcesTrackerImpl {
 
       // Container-1 requesting local resource.
       tracker.handle(reqEvent1);
+      dispatcher.await();
 
       // New localized Resource should have been added to local resource map
       // and the requesting container will be added to its waiting queue.
@@ -280,6 +292,7 @@ public class TestLocalResourcesTrackerImpl {
       ResourceEvent reqEvent2 =
           new ResourceRequestEvent(lr, LocalResourceVisibility.PRIVATE, lc2);
       tracker.handle(reqEvent2);
+      dispatcher.await();
 
       // Container 2 should have been added to the waiting queue of the local
       // resource
@@ -295,6 +308,7 @@ public class TestLocalResourcesTrackerImpl {
       LocalizedResource localizedResource = localrsrc.get(lr);
       
       tracker.handle(resourceFailedEvent);
+      dispatcher.await();
 
       // After receiving failed resource event; all waiting containers will be
       // notified with Container Resource Failed Event.
@@ -308,6 +322,7 @@ public class TestLocalResourcesTrackerImpl {
       // exception.
       ResourceReleaseEvent relEvent1 = new ResourceReleaseEvent(lr, cId1);
       tracker.handle(relEvent1);
+      dispatcher.await();
 
       // Container-3 now requests for the same resource. This request call
       // is coming prior to Container-2's release call.
@@ -316,6 +331,7 @@ public class TestLocalResourcesTrackerImpl {
       ResourceEvent reqEvent3 =
           new ResourceRequestEvent(lr, LocalResourceVisibility.PRIVATE, lc3);
       tracker.handle(reqEvent3);
+      dispatcher.await();
 
       // Local resource cache now should have the requested resource and the
       // number of waiting containers should be 1.
@@ -327,6 +343,7 @@ public class TestLocalResourcesTrackerImpl {
       // Container-2 Releases the resource
       ResourceReleaseEvent relEvent2 = new ResourceReleaseEvent(lr, cId2);
       tracker.handle(relEvent2);
+      dispatcher.await();
 
       // Making sure that there is no change in the cache after the release.
       Assert.assertEquals(1, localrsrc.size());
@@ -340,6 +357,7 @@ public class TestLocalResourcesTrackerImpl {
       ResourceLocalizedEvent localizedEvent =
           new ResourceLocalizedEvent(lr, localizedPath, 123L);
       tracker.handle(localizedEvent);
+      dispatcher.await();
       
       // Verifying ContainerResourceLocalizedEvent .
       verify(containerEventHandler, times(1)).handle(
@@ -351,6 +369,7 @@ public class TestLocalResourcesTrackerImpl {
       // Container-3 releasing the resource.
       ResourceReleaseEvent relEvent3 = new ResourceReleaseEvent(lr, cId3);
       tracker.handle(relEvent3);
+      dispatcher.await();
       
       Assert.assertEquals(0, localrsrc.get(lr).getRefCount());
       
@@ -384,7 +403,8 @@ public class TestLocalResourcesTrackerImpl {
       ConcurrentMap<LocalResourceRequest, LocalizedResource> localrsrc =
           new ConcurrentHashMap<LocalResourceRequest, LocalizedResource>();
       LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user,
-          dispatcher, localrsrc, true, conf);
+          null, dispatcher, localrsrc, true, conf,
+          new NMNullStateStoreService());
 
       // This is a random path. NO File creation will take place at this place.
       Path localDir = new Path("/tmp");
@@ -401,7 +421,9 @@ public class TestLocalResourcesTrackerImpl {
       tracker.handle(reqEvent1);
 
       // Simulate the process of localization of lr1
-      Path hierarchicalPath1 = tracker.getPathForLocalization(lr1, localDir);
+      // NOTE: Localization path from tracker has resource ID at end
+      Path hierarchicalPath1 =
+          tracker.getPathForLocalization(lr1, localDir).getParent();
       // Simulate lr1 getting localized
       ResourceLocalizedEvent rle1 =
           new ResourceLocalizedEvent(lr1,
@@ -417,7 +439,8 @@ public class TestLocalResourcesTrackerImpl {
           new ResourceRequestEvent(lr2, LocalResourceVisibility.PUBLIC, lc1);
       tracker.handle(reqEvent2);
 
-      Path hierarchicalPath2 = tracker.getPathForLocalization(lr2, localDir);
+      Path hierarchicalPath2 =
+          tracker.getPathForLocalization(lr2, localDir).getParent();
       // localization failed.
       ResourceFailedLocalizationEvent rfe2 =
           new ResourceFailedLocalizationEvent(
@@ -435,7 +458,8 @@ public class TestLocalResourcesTrackerImpl {
       ResourceEvent reqEvent3 = new ResourceRequestEvent(lr3,
           LocalResourceVisibility.PUBLIC, lc1);
       tracker.handle(reqEvent3);
-      Path hierarchicalPath3 = tracker.getPathForLocalization(lr3, localDir);
+      Path hierarchicalPath3 =
+          tracker.getPathForLocalization(lr3, localDir).getParent();
       // localization successful
       ResourceLocalizedEvent rle3 =
           new ResourceLocalizedEvent(lr3, new Path(hierarchicalPath3.toUri()
@@ -479,6 +503,284 @@ public class TestLocalResourcesTrackerImpl {
     }
   }
 
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testStateStoreSuccessfulLocalization() throws Exception {
+    final String user = "someuser";
+    final ApplicationId appId = ApplicationId.newInstance(1, 1);
+    // This is a random path. NO File creation will take place at this place.
+    final Path localDir = new Path("/tmp");
+    Configuration conf = new YarnConfiguration();
+    DrainDispatcher dispatcher = null;
+    dispatcher = createDispatcher(conf);
+    EventHandler<LocalizerEvent> localizerEventHandler =
+        mock(EventHandler.class);
+    EventHandler<LocalizerEvent> containerEventHandler =
+        mock(EventHandler.class);
+    dispatcher.register(LocalizerEventType.class, localizerEventHandler);
+    dispatcher.register(ContainerEventType.class, containerEventHandler);
+    DeletionService mockDelService = mock(DeletionService.class);
+    NMStateStoreService stateStore = mock(NMStateStoreService.class);
+
+    try {
+      LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user,
+          appId, dispatcher, false, conf, stateStore);
+      // Container 1 needs lr1 resource
+      ContainerId cId1 = BuilderUtils.newContainerId(1, 1, 1, 1);
+      LocalResourceRequest lr1 = createLocalResourceRequest(user, 1, 1,
+          LocalResourceVisibility.APPLICATION);
+      LocalizerContext lc1 = new LocalizerContext(user, cId1, null);
+
+      // Container 1 requests lr1 to be localized
+      ResourceEvent reqEvent1 = new ResourceRequestEvent(lr1,
+          LocalResourceVisibility.APPLICATION, lc1);
+      tracker.handle(reqEvent1);
+      dispatcher.await();
+
+      // Simulate the process of localization of lr1
+      Path hierarchicalPath1 = tracker.getPathForLocalization(lr1, localDir);
+
+      ArgumentCaptor<LocalResourceProto> localResourceCaptor =
+          ArgumentCaptor.forClass(LocalResourceProto.class);
+      ArgumentCaptor<Path> pathCaptor = ArgumentCaptor.forClass(Path.class);
+      verify(stateStore).startResourceLocalization(eq(user), eq(appId),
+          localResourceCaptor.capture(), pathCaptor.capture());
+      LocalResourceProto lrProto = localResourceCaptor.getValue();
+      Path localizedPath1 = pathCaptor.getValue();
+      Assert.assertEquals(lr1,
+          new LocalResourceRequest(new LocalResourcePBImpl(lrProto)));
+      Assert.assertEquals(hierarchicalPath1, localizedPath1.getParent());
+
+      // Simulate lr1 getting localized
+      ResourceLocalizedEvent rle1 =
+          new ResourceLocalizedEvent(lr1, pathCaptor.getValue(), 120);
+      tracker.handle(rle1);
+      dispatcher.await();
+
+      ArgumentCaptor<LocalizedResourceProto> localizedProtoCaptor =
+          ArgumentCaptor.forClass(LocalizedResourceProto.class);
+      verify(stateStore).finishResourceLocalization(eq(user), eq(appId),
+          localizedProtoCaptor.capture());
+      LocalizedResourceProto localizedProto = localizedProtoCaptor.getValue();
+      Assert.assertEquals(lr1, new LocalResourceRequest(
+          new LocalResourcePBImpl(localizedProto.getResource())));
+      Assert.assertEquals(localizedPath1.toString(),
+          localizedProto.getLocalPath());
+      LocalizedResource localizedRsrc1 = tracker.getLocalizedResource(lr1);
+      Assert.assertNotNull(localizedRsrc1);
+
+      // simulate release and retention processing
+      tracker.handle(new ResourceReleaseEvent(lr1, cId1));
+      dispatcher.await();
+      boolean removeResult = tracker.remove(localizedRsrc1, mockDelService);
+
+      Assert.assertTrue(removeResult);
+      verify(stateStore).removeLocalizedResource(eq(user), eq(appId),
+          eq(localizedPath1));
+    } finally {
+      if (dispatcher != null) {
+        dispatcher.stop();
+      }
+    }
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testStateStoreFailedLocalization() throws Exception {
+    final String user = "someuser";
+    final ApplicationId appId = ApplicationId.newInstance(1, 1);
+    // This is a random path. NO File creation will take place at this place.
+    final Path localDir = new Path("/tmp");
+    Configuration conf = new YarnConfiguration();
+    DrainDispatcher dispatcher = null;
+    dispatcher = createDispatcher(conf);
+    EventHandler<LocalizerEvent> localizerEventHandler =
+        mock(EventHandler.class);
+    EventHandler<LocalizerEvent> containerEventHandler =
+        mock(EventHandler.class);
+    dispatcher.register(LocalizerEventType.class, localizerEventHandler);
+    dispatcher.register(ContainerEventType.class, containerEventHandler);
+    NMStateStoreService stateStore = mock(NMStateStoreService.class);
+
+    try {
+      LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user,
+          appId, dispatcher, false, conf, stateStore);
+      // Container 1 needs lr1 resource
+      ContainerId cId1 = BuilderUtils.newContainerId(1, 1, 1, 1);
+      LocalResourceRequest lr1 = createLocalResourceRequest(user, 1, 1,
+          LocalResourceVisibility.APPLICATION);
+      LocalizerContext lc1 = new LocalizerContext(user, cId1, null);
+
+      // Container 1 requests lr1 to be localized
+      ResourceEvent reqEvent1 = new ResourceRequestEvent(lr1,
+          LocalResourceVisibility.APPLICATION, lc1);
+      tracker.handle(reqEvent1);
+      dispatcher.await();
+
+      // Simulate the process of localization of lr1
+      Path hierarchicalPath1 = tracker.getPathForLocalization(lr1, localDir);
+
+      ArgumentCaptor<LocalResourceProto> localResourceCaptor =
+          ArgumentCaptor.forClass(LocalResourceProto.class);
+      ArgumentCaptor<Path> pathCaptor = ArgumentCaptor.forClass(Path.class);
+      verify(stateStore).startResourceLocalization(eq(user), eq(appId),
+          localResourceCaptor.capture(), pathCaptor.capture());
+      LocalResourceProto lrProto = localResourceCaptor.getValue();
+      Path localizedPath1 = pathCaptor.getValue();
+      Assert.assertEquals(lr1,
+          new LocalResourceRequest(new LocalResourcePBImpl(lrProto)));
+      Assert.assertEquals(hierarchicalPath1, localizedPath1.getParent());
+
+      ResourceFailedLocalizationEvent rfe1 =
+          new ResourceFailedLocalizationEvent(
+              lr1, new Exception("Test").toString());
+      tracker.handle(rfe1);
+      dispatcher.await();
+      verify(stateStore).removeLocalizedResource(eq(user), eq(appId),
+          eq(localizedPath1));
+    } finally {
+      if (dispatcher != null) {
+        dispatcher.stop();
+      }
+    }
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testRecoveredResource() throws Exception {
+    final String user = "someuser";
+    final ApplicationId appId = ApplicationId.newInstance(1, 1);
+    // This is a random path. NO File creation will take place at this place.
+    final Path localDir = new Path("/tmp/localdir");
+    Configuration conf = new YarnConfiguration();
+    DrainDispatcher dispatcher = null;
+    dispatcher = createDispatcher(conf);
+    EventHandler<LocalizerEvent> localizerEventHandler =
+        mock(EventHandler.class);
+    EventHandler<LocalizerEvent> containerEventHandler =
+        mock(EventHandler.class);
+    dispatcher.register(LocalizerEventType.class, localizerEventHandler);
+    dispatcher.register(ContainerEventType.class, containerEventHandler);
+    NMStateStoreService stateStore = mock(NMStateStoreService.class);
+
+    try {
+      LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user,
+          appId, dispatcher, false, conf, stateStore);
+      // Container 1 needs lr1 resource
+      ContainerId cId1 = BuilderUtils.newContainerId(1, 1, 1, 1);
+      LocalResourceRequest lr1 = createLocalResourceRequest(user, 1, 1,
+          LocalResourceVisibility.APPLICATION);
+      Assert.assertNull(tracker.getLocalizedResource(lr1));
+      final long localizedId1 = 52;
+      Path hierarchicalPath1 = new Path(localDir,
+          Long.toString(localizedId1));
+      Path localizedPath1 = new Path(hierarchicalPath1, "resource.jar");
+      tracker.handle(new ResourceRecoveredEvent(lr1, localizedPath1, 120));
+      dispatcher.await();
+      Assert.assertNotNull(tracker.getLocalizedResource(lr1));
+
+      // verify new paths reflect recovery of previous resources
+      LocalResourceRequest lr2 = createLocalResourceRequest(user, 2, 2,
+          LocalResourceVisibility.APPLICATION);
+      LocalizerContext lc2 = new LocalizerContext(user, cId1, null);
+      ResourceEvent reqEvent2 = new ResourceRequestEvent(lr2,
+          LocalResourceVisibility.APPLICATION, lc2);
+      tracker.handle(reqEvent2);
+      dispatcher.await();
+      Path hierarchicalPath2 = tracker.getPathForLocalization(lr2, localDir);
+      long localizedId2 = Long.parseLong(hierarchicalPath2.getName());
+      Assert.assertEquals(localizedId1 + 1, localizedId2);
+    } finally {
+      if (dispatcher != null) {
+        dispatcher.stop();
+      }
+    }
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testRecoveredResourceWithDirCacheMgr() throws Exception {
+    final String user = "someuser";
+    final ApplicationId appId = ApplicationId.newInstance(1, 1);
+    // This is a random path. NO File creation will take place at this place.
+    final Path localDirRoot = new Path("/tmp/localdir");
+    Configuration conf = new YarnConfiguration();
+    DrainDispatcher dispatcher = null;
+    dispatcher = createDispatcher(conf);
+    EventHandler<LocalizerEvent> localizerEventHandler =
+        mock(EventHandler.class);
+    EventHandler<LocalizerEvent> containerEventHandler =
+        mock(EventHandler.class);
+    dispatcher.register(LocalizerEventType.class, localizerEventHandler);
+    dispatcher.register(ContainerEventType.class, containerEventHandler);
+    NMStateStoreService stateStore = mock(NMStateStoreService.class);
+
+    try {
+      LocalResourcesTrackerImpl tracker = new LocalResourcesTrackerImpl(user,
+          appId, dispatcher, true, conf, stateStore);
+      LocalResourceRequest lr1 = createLocalResourceRequest(user, 1, 1,
+          LocalResourceVisibility.PUBLIC);
+      Assert.assertNull(tracker.getLocalizedResource(lr1));
+      final long localizedId1 = 52;
+      Path hierarchicalPath1 = new Path(localDirRoot + "/4/2",
+          Long.toString(localizedId1));
+      Path localizedPath1 = new Path(hierarchicalPath1, "resource.jar");
+      tracker.handle(new ResourceRecoveredEvent(lr1, localizedPath1, 120));
+      dispatcher.await();
+      Assert.assertNotNull(tracker.getLocalizedResource(lr1));
+      LocalCacheDirectoryManager dirMgrRoot =
+          tracker.getDirectoryManager(localDirRoot);
+      Assert.assertEquals(0, dirMgrRoot.getDirectory("").getCount());
+      Assert.assertEquals(1, dirMgrRoot.getDirectory("4/2").getCount());
+
+      LocalResourceRequest lr2 = createLocalResourceRequest(user, 2, 2,
+          LocalResourceVisibility.PUBLIC);
+      Assert.assertNull(tracker.getLocalizedResource(lr2));
+      final long localizedId2 = localizedId1 + 1;
+      Path hierarchicalPath2 = new Path(localDirRoot + "/4/2",
+          Long.toString(localizedId2));
+      Path localizedPath2 = new Path(hierarchicalPath2, "resource.jar");
+      tracker.handle(new ResourceRecoveredEvent(lr2, localizedPath2, 120));
+      dispatcher.await();
+      Assert.assertNotNull(tracker.getLocalizedResource(lr2));
+      Assert.assertEquals(0, dirMgrRoot.getDirectory("").getCount());
+      Assert.assertEquals(2, dirMgrRoot.getDirectory("4/2").getCount());
+
+      LocalResourceRequest lr3 = createLocalResourceRequest(user, 3, 3,
+          LocalResourceVisibility.PUBLIC);
+      Assert.assertNull(tracker.getLocalizedResource(lr3));
+      final long localizedId3 = 128;
+      Path hierarchicalPath3 = new Path(localDirRoot + "/4/3",
+          Long.toString(localizedId3));
+      Path localizedPath3 = new Path(hierarchicalPath3, "resource.jar");
+      tracker.handle(new ResourceRecoveredEvent(lr3, localizedPath3, 120));
+      dispatcher.await();
+      Assert.assertNotNull(tracker.getLocalizedResource(lr3));
+      Assert.assertEquals(0, dirMgrRoot.getDirectory("").getCount());
+      Assert.assertEquals(2, dirMgrRoot.getDirectory("4/2").getCount());
+      Assert.assertEquals(1, dirMgrRoot.getDirectory("4/3").getCount());
+
+      LocalResourceRequest lr4 = createLocalResourceRequest(user, 4, 4,
+          LocalResourceVisibility.PUBLIC);
+      Assert.assertNull(tracker.getLocalizedResource(lr4));
+      final long localizedId4 = 256;
+      Path hierarchicalPath4 = new Path(localDirRoot + "/4",
+          Long.toString(localizedId4));
+      Path localizedPath4 = new Path(hierarchicalPath4, "resource.jar");
+      tracker.handle(new ResourceRecoveredEvent(lr4, localizedPath4, 120));
+      dispatcher.await();
+      Assert.assertNotNull(tracker.getLocalizedResource(lr4));
+      Assert.assertEquals(0, dirMgrRoot.getDirectory("").getCount());
+      Assert.assertEquals(1, dirMgrRoot.getDirectory("4").getCount());
+      Assert.assertEquals(2, dirMgrRoot.getDirectory("4/2").getCount());
+      Assert.assertEquals(1, dirMgrRoot.getDirectory("4/3").getCount());
+    } finally {
+      if (dispatcher != null) {
+        dispatcher.stop();
+      }
+    }
+  }
+
   private boolean createdummylocalizefile(Path path) {
     boolean ret = false;
     File file = new File(path.toUri().getRawPath().toString());

+ 270 - 14
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java

@@ -19,6 +19,8 @@
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Matchers.anyBoolean;
 import static org.mockito.Matchers.anyInt;
@@ -120,6 +122,10 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.even
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerResourceRequestEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceFailedLocalizationEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceLocalizedEvent;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.junit.After;
@@ -188,7 +194,8 @@ public class TestResourceLocalizationService {
 
     ResourceLocalizationService locService =
       spy(new ResourceLocalizationService(dispatcher, exec, delService,
-                                          diskhandler));
+                                          diskhandler,
+                                          new NMNullStateStoreService()));
     doReturn(lfs)
       .when(locService).getLocalFileContext(isA(Configuration.class));
     try {
@@ -253,7 +260,8 @@ public class TestResourceLocalizationService {
 
     ResourceLocalizationService rawService =
       new ResourceLocalizationService(dispatcher, exec, delService,
-                                      dirsHandler);
+                                      dirsHandler,
+                                      new NMNullStateStoreService());
     ResourceLocalizationService spyService = spy(rawService);
     doReturn(mockServer).when(spyService).createServer();
     doReturn(mockLocallilzerTracker).when(spyService).createLocalizerTracker(
@@ -287,7 +295,7 @@ public class TestResourceLocalizationService {
               user, appId);
 
       // init container.
-      final Container c = getMockContainer(appId, 42);
+      final Container c = getMockContainer(appId, 42, user);
       
       // init resources
       Random r = new Random();
@@ -402,6 +410,233 @@ public class TestResourceLocalizationService {
     }
   }
   
+  @Test
+  @SuppressWarnings("unchecked") // mocked generics
+  public void testRecovery() throws Exception {
+    final String user1 = "user1";
+    final String user2 = "user2";
+    final ApplicationId appId1 = ApplicationId.newInstance(1, 1);
+    final ApplicationId appId2 = ApplicationId.newInstance(1, 2);
+
+    List<Path> localDirs = new ArrayList<Path>();
+    String[] sDirs = new String[4];
+    for (int i = 0; i < 4; ++i) {
+      localDirs.add(lfs.makeQualified(new Path(basedir, i + "")));
+      sDirs[i] = localDirs.get(i).toString();
+    }
+    conf.setStrings(YarnConfiguration.NM_LOCAL_DIRS, sDirs);
+    conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
+
+    NMMemoryStateStoreService stateStore = new NMMemoryStateStoreService();
+    stateStore.init(conf);
+    stateStore.start();
+    DrainDispatcher dispatcher = new DrainDispatcher();
+    dispatcher.init(conf);
+    dispatcher.start();
+    EventHandler<ApplicationEvent> applicationBus = mock(EventHandler.class);
+    dispatcher.register(ApplicationEventType.class, applicationBus);
+    EventHandler<ContainerEvent> containerBus = mock(EventHandler.class);
+    dispatcher.register(ContainerEventType.class, containerBus);
+    //Ignore actual localization
+    EventHandler<LocalizerEvent> localizerBus = mock(EventHandler.class);
+    dispatcher.register(LocalizerEventType.class, localizerBus);
+
+    LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
+    dirsHandler.init(conf);
+
+    ResourceLocalizationService spyService =
+        createSpyService(dispatcher, dirsHandler, stateStore);
+    try {
+      spyService.init(conf);
+      spyService.start();
+
+      final Application app1 = mock(Application.class);
+      when(app1.getUser()).thenReturn(user1);
+      when(app1.getAppId()).thenReturn(appId1);
+      final Application app2 = mock(Application.class);
+      when(app2.getUser()).thenReturn(user2);
+      when(app2.getAppId()).thenReturn(appId2);
+      spyService.handle(new ApplicationLocalizationEvent(
+          LocalizationEventType.INIT_APPLICATION_RESOURCES, app1));
+      spyService.handle(new ApplicationLocalizationEvent(
+          LocalizationEventType.INIT_APPLICATION_RESOURCES, app2));
+      dispatcher.await();
+
+      //Get a handle on the trackers after they're setup with INIT_APP_RESOURCES
+      LocalResourcesTracker appTracker1 =
+          spyService.getLocalResourcesTracker(
+              LocalResourceVisibility.APPLICATION, user1, appId1);
+      LocalResourcesTracker privTracker1 =
+          spyService.getLocalResourcesTracker(LocalResourceVisibility.PRIVATE,
+              user1, null);
+      LocalResourcesTracker appTracker2 =
+          spyService.getLocalResourcesTracker(
+              LocalResourceVisibility.APPLICATION, user2, appId2);
+      LocalResourcesTracker pubTracker =
+          spyService.getLocalResourcesTracker(LocalResourceVisibility.PUBLIC,
+              null, null);
+
+      // init containers
+      final Container c1 = getMockContainer(appId1, 1, user1);
+      final Container c2 = getMockContainer(appId2, 2, user2);
+
+      // init resources
+      Random r = new Random();
+      long seed = r.nextLong();
+      System.out.println("SEED: " + seed);
+      r.setSeed(seed);
+
+      // Send localization requests of each type.
+      final LocalResource privResource1 = getPrivateMockedResource(r);
+      final LocalResourceRequest privReq1 =
+          new LocalResourceRequest(privResource1);
+      final LocalResource privResource2 = getPrivateMockedResource(r);
+      final LocalResourceRequest privReq2 =
+          new LocalResourceRequest(privResource2);
+
+      final LocalResource pubResource1 = getPublicMockedResource(r);
+      final LocalResourceRequest pubReq1 =
+          new LocalResourceRequest(pubResource1);
+      final LocalResource pubResource2 = getPublicMockedResource(r);
+      final LocalResourceRequest pubReq2 =
+          new LocalResourceRequest(pubResource2);
+
+      final LocalResource appResource1 = getAppMockedResource(r);
+      final LocalResourceRequest appReq1 =
+          new LocalResourceRequest(appResource1);
+      final LocalResource appResource2 = getAppMockedResource(r);
+      final LocalResourceRequest appReq2 =
+          new LocalResourceRequest(appResource2);
+      final LocalResource appResource3 = getAppMockedResource(r);
+      final LocalResourceRequest appReq3 =
+          new LocalResourceRequest(appResource3);
+
+      Map<LocalResourceVisibility, Collection<LocalResourceRequest>> req1 =
+          new HashMap<LocalResourceVisibility,
+                      Collection<LocalResourceRequest>>();
+      req1.put(LocalResourceVisibility.PRIVATE,
+          Arrays.asList(new LocalResourceRequest[] { privReq1, privReq2 }));
+      req1.put(LocalResourceVisibility.PUBLIC,
+          Collections.singletonList(pubReq1));
+      req1.put(LocalResourceVisibility.APPLICATION,
+          Collections.singletonList(appReq1));
+
+      Map<LocalResourceVisibility, Collection<LocalResourceRequest>> req2 =
+        new HashMap<LocalResourceVisibility,
+                    Collection<LocalResourceRequest>>();
+      req2.put(LocalResourceVisibility.APPLICATION,
+          Arrays.asList(new LocalResourceRequest[] { appReq2, appReq3 }));
+      req2.put(LocalResourceVisibility.PUBLIC,
+          Collections.singletonList(pubReq2));
+
+      // Send Request event
+      spyService.handle(new ContainerLocalizationRequestEvent(c1, req1));
+      spyService.handle(new ContainerLocalizationRequestEvent(c2, req2));
+      dispatcher.await();
+
+      // Simulate start of localization for all resources
+      privTracker1.getPathForLocalization(privReq1,
+          dirsHandler.getLocalPathForWrite(
+              ContainerLocalizer.USERCACHE + user1));
+      privTracker1.getPathForLocalization(privReq2,
+          dirsHandler.getLocalPathForWrite(
+              ContainerLocalizer.USERCACHE + user1));
+      LocalizedResource privLr1 = privTracker1.getLocalizedResource(privReq1);
+      LocalizedResource privLr2 = privTracker1.getLocalizedResource(privReq2);
+      appTracker1.getPathForLocalization(appReq1,
+          dirsHandler.getLocalPathForWrite(
+              ContainerLocalizer.APPCACHE + appId1));
+      LocalizedResource appLr1 = appTracker1.getLocalizedResource(appReq1);
+      appTracker2.getPathForLocalization(appReq2,
+          dirsHandler.getLocalPathForWrite(
+              ContainerLocalizer.APPCACHE + appId2));
+      LocalizedResource appLr2 = appTracker2.getLocalizedResource(appReq2);
+      appTracker2.getPathForLocalization(appReq3,
+          dirsHandler.getLocalPathForWrite(
+              ContainerLocalizer.APPCACHE + appId2));
+      LocalizedResource appLr3 = appTracker2.getLocalizedResource(appReq3);
+      pubTracker.getPathForLocalization(pubReq1,
+          dirsHandler.getLocalPathForWrite(ContainerLocalizer.FILECACHE));
+      LocalizedResource pubLr1 = pubTracker.getLocalizedResource(pubReq1);
+      pubTracker.getPathForLocalization(pubReq2,
+          dirsHandler.getLocalPathForWrite(ContainerLocalizer.FILECACHE));
+      LocalizedResource pubLr2 = pubTracker.getLocalizedResource(pubReq2);
+
+      // Simulate completion of localization for most resources with
+      // possibly different sizes than in the request
+      assertNotNull("Localization not started", privLr1.getLocalPath());
+      privTracker1.handle(new ResourceLocalizedEvent(privReq1,
+          privLr1.getLocalPath(), privLr1.getSize() + 5));
+      assertNotNull("Localization not started", privLr2.getLocalPath());
+      privTracker1.handle(new ResourceLocalizedEvent(privReq2,
+          privLr2.getLocalPath(), privLr2.getSize() + 10));
+      assertNotNull("Localization not started", appLr1.getLocalPath());
+      appTracker1.handle(new ResourceLocalizedEvent(appReq1,
+          appLr1.getLocalPath(), appLr1.getSize()));
+      assertNotNull("Localization not started", appLr3.getLocalPath());
+      appTracker2.handle(new ResourceLocalizedEvent(appReq3,
+          appLr3.getLocalPath(), appLr3.getSize() + 7));
+      assertNotNull("Localization not started", pubLr1.getLocalPath());
+      pubTracker.handle(new ResourceLocalizedEvent(pubReq1,
+          pubLr1.getLocalPath(), pubLr1.getSize() + 1000));
+      assertNotNull("Localization not started", pubLr2.getLocalPath());
+      pubTracker.handle(new ResourceLocalizedEvent(pubReq2,
+          pubLr2.getLocalPath(), pubLr2.getSize() + 99999));
+
+      dispatcher.await();
+      assertEquals(ResourceState.LOCALIZED, privLr1.getState());
+      assertEquals(ResourceState.LOCALIZED, privLr2.getState());
+      assertEquals(ResourceState.LOCALIZED, appLr1.getState());
+      assertEquals(ResourceState.DOWNLOADING, appLr2.getState());
+      assertEquals(ResourceState.LOCALIZED, appLr3.getState());
+      assertEquals(ResourceState.LOCALIZED, pubLr1.getState());
+      assertEquals(ResourceState.LOCALIZED, pubLr2.getState());
+
+      // restart and recover
+      spyService = createSpyService(dispatcher, dirsHandler, stateStore);
+      spyService.init(conf);
+      spyService.recoverLocalizedResources(
+          stateStore.loadLocalizationState());
+      dispatcher.await();
+
+      appTracker1 = spyService.getLocalResourcesTracker(
+              LocalResourceVisibility.APPLICATION, user1, appId1);
+      privTracker1 = spyService.getLocalResourcesTracker(
+          LocalResourceVisibility.PRIVATE, user1, null);
+      appTracker2 = spyService.getLocalResourcesTracker(
+              LocalResourceVisibility.APPLICATION, user2, appId2);
+      pubTracker = spyService.getLocalResourcesTracker(
+          LocalResourceVisibility.PUBLIC, null, null);
+
+      LocalizedResource recoveredRsrc =
+          privTracker1.getLocalizedResource(privReq1);
+      assertEquals(privReq1, recoveredRsrc.getRequest());
+      assertEquals(privLr1.getLocalPath(), recoveredRsrc.getLocalPath());
+      assertEquals(privLr1.getSize(), recoveredRsrc.getSize());
+      assertEquals(ResourceState.LOCALIZED, recoveredRsrc.getState());
+      recoveredRsrc = privTracker1.getLocalizedResource(privReq2);
+      assertEquals(privReq2, recoveredRsrc.getRequest());
+      assertEquals(privLr2.getLocalPath(), recoveredRsrc.getLocalPath());
+      assertEquals(privLr2.getSize(), recoveredRsrc.getSize());
+      assertEquals(ResourceState.LOCALIZED, recoveredRsrc.getState());
+      recoveredRsrc = appTracker1.getLocalizedResource(appReq1);
+      assertEquals(appReq1, recoveredRsrc.getRequest());
+      assertEquals(appLr1.getLocalPath(), recoveredRsrc.getLocalPath());
+      assertEquals(appLr1.getSize(), recoveredRsrc.getSize());
+      assertEquals(ResourceState.LOCALIZED, recoveredRsrc.getState());
+      recoveredRsrc = appTracker2.getLocalizedResource(appReq2);
+      assertNull("in-progress resource should not be present", recoveredRsrc);
+      recoveredRsrc = appTracker2.getLocalizedResource(appReq3);
+      assertEquals(appReq3, recoveredRsrc.getRequest());
+      assertEquals(appLr3.getLocalPath(), recoveredRsrc.getLocalPath());
+      assertEquals(appLr3.getSize(), recoveredRsrc.getSize());
+      assertEquals(ResourceState.LOCALIZED, recoveredRsrc.getState());
+    } finally {
+      dispatcher.stop();
+      stateStore.close();
+    }
+  }
+
   @Test( timeout = 10000)
   @SuppressWarnings("unchecked") // mocked generics
   public void testLocalizationHeartbeat() throws Exception {
@@ -436,7 +671,8 @@ public class TestResourceLocalizationService {
 
     ResourceLocalizationService rawService =
       new ResourceLocalizationService(dispatcher, exec, delService,
-                                      dirsHandler);
+                                      dirsHandler,
+                                      new NMNullStateStoreService());
     ResourceLocalizationService spyService = spy(rawService);
     doReturn(mockServer).when(spyService).createServer();
     doReturn(lfs).when(spyService).getLocalFileContext(isA(Configuration.class));
@@ -469,7 +705,7 @@ public class TestResourceLocalizationService {
       long seed = r.nextLong();
       System.out.println("SEED: " + seed);
       r.setSeed(seed);
-      final Container c = getMockContainer(appId, 42);
+      final Container c = getMockContainer(appId, 42, "user0");
       FSDataOutputStream out =
         new FSDataOutputStream(new DataOutputBuffer(), null);
       doReturn(out).when(spylfs).createInternal(isA(Path.class),
@@ -616,7 +852,8 @@ public class TestResourceLocalizationService {
     try {
       ResourceLocalizationService rawService =
           new ResourceLocalizationService(dispatcher, exec, delService,
-                                        dirsHandler);
+                                        dirsHandler,
+                                        new NMNullStateStoreService());
       ResourceLocalizationService spyService = spy(rawService);
       doReturn(mockServer).when(spyService).createServer();
       doReturn(lfs).when(spyService).getLocalFileContext(
@@ -637,7 +874,7 @@ public class TestResourceLocalizationService {
       dispatcher.await();
 
       // init container.
-      final Container c = getMockContainer(appId, 42);
+      final Container c = getMockContainer(appId, 42, user);
 
       // init resources
       Random r = new Random();
@@ -725,7 +962,7 @@ public class TestResourceLocalizationService {
     try {
       ResourceLocalizationService rawService =
           new ResourceLocalizationService(dispatcher, exec, delService,
-            dirsHandlerSpy);
+            dirsHandlerSpy, new NMNullStateStoreService());
       ResourceLocalizationService spyService = spy(rawService);
       doReturn(mockServer).when(spyService).createServer();
       doReturn(lfs).when(spyService).getLocalFileContext(
@@ -758,7 +995,7 @@ public class TestResourceLocalizationService {
         .put(LocalResourceVisibility.PUBLIC, Collections.singletonList(pubReq));
 
       // init container.
-      final Container c = getMockContainer(appId, 42);
+      final Container c = getMockContainer(appId, 42, user);
 
       // first test ioexception
       Mockito
@@ -838,7 +1075,7 @@ public class TestResourceLocalizationService {
 
       ResourceLocalizationService rls =
           new ResourceLocalizationService(dispatcher1, exec, delService,
-            localDirHandler);
+            localDirHandler, new NMNullStateStoreService());
       dispatcher1.register(LocalizationEventType.class, rls);
       rls.init(conf);
 
@@ -991,7 +1228,7 @@ public class TestResourceLocalizationService {
 
       ResourceLocalizationService rls =
           new ResourceLocalizationService(dispatcher1, exec, delService,
-            localDirHandler);
+            localDirHandler, new NMNullStateStoreService());
       dispatcher1.register(LocalizationEventType.class, rls);
       rls.init(conf);
 
@@ -1157,7 +1394,7 @@ public class TestResourceLocalizationService {
       // it as otherwise it will remove requests from pending queue.
       ResourceLocalizationService rawService =
           new ResourceLocalizationService(dispatcher1, exec, delService,
-            dirsHandler);
+            dirsHandler, new NMNullStateStoreService());
       ResourceLocalizationService spyService = spy(rawService);
       dispatcher1.register(LocalizationEventType.class, spyService);
       spyService.init(conf);
@@ -1424,12 +1661,13 @@ public class TestResourceLocalizationService {
     return getMockedResource(r, LocalResourceVisibility.PRIVATE);
   }
 
-  private static Container getMockContainer(ApplicationId appId, int id) {
+  private static Container getMockContainer(ApplicationId appId, int id,
+      String user) {
     Container c = mock(Container.class);
     ApplicationAttemptId appAttemptId =
         BuilderUtils.newApplicationAttemptId(appId, 1);
     ContainerId cId = BuilderUtils.newContainerId(appAttemptId, id);
-    when(c.getUser()).thenReturn("user0");
+    when(c.getUser()).thenReturn(user);
     when(c.getContainerId()).thenReturn(cId);
     Credentials creds = new Credentials();
     creds.addToken(new Text("tok" + id), getToken(id));
@@ -1438,6 +1676,24 @@ public class TestResourceLocalizationService {
     return c;
   }
 
+  private ResourceLocalizationService createSpyService(
+      DrainDispatcher dispatcher, LocalDirsHandlerService dirsHandler,
+      NMStateStoreService stateStore) {
+    ContainerExecutor exec = mock(ContainerExecutor.class);
+    LocalizerTracker mockLocalizerTracker = mock(LocalizerTracker.class);
+    DeletionService delService = mock(DeletionService.class);
+    ResourceLocalizationService rawService =
+      new ResourceLocalizationService(dispatcher, exec, delService,
+                                      dirsHandler, stateStore);
+    ResourceLocalizationService spyService = spy(rawService);
+    doReturn(mockServer).when(spyService).createServer();
+    doReturn(mockLocalizerTracker).when(spyService).createLocalizerTracker(
+        isA(Configuration.class));
+    doReturn(lfs).when(spyService)
+        .getLocalFileContext(isA(Configuration.class));
+    return spyService;
+  }
+
   @SuppressWarnings({ "unchecked", "rawtypes" })
   static Token<? extends TokenIdentifier> getToken(int id) {
     return new Token(("ident" + id).getBytes(), ("passwd" + id).getBytes(),

+ 4 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceRetention.java

@@ -26,11 +26,13 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.api.records.LocalResourceType;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
-
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
 import org.junit.Test;
+
 import static org.junit.Assert.*;
 
 import org.mockito.ArgumentCaptor;
+
 import static org.mockito.Mockito.*;
 
 public class TestResourceRetention {
@@ -81,7 +83,7 @@ public class TestResourceRetention {
     ConcurrentMap<LocalResourceRequest,LocalizedResource> trackerResources =
       new ConcurrentHashMap<LocalResourceRequest,LocalizedResource>();
     LocalResourcesTracker ret = spy(new LocalResourcesTrackerImpl(user, null,
-          trackerResources, false, conf));
+      null, trackerResources, false, conf, new NMNullStateStoreService()));
     for (int i = 0; i < nRsrcs; ++i) {
       final LocalResourceRequest req = new LocalResourceRequest(
           new Path("file:///" + user + "/rsrc" + i), timestamp + i * tsstep,

+ 171 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java

@@ -0,0 +1,171 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.recovery;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto;
+
+public class NMMemoryStateStoreService extends NMStateStoreService {
+  private Map<TrackerKey, TrackerState> trackerStates;
+
+  public NMMemoryStateStoreService() {
+    super(NMMemoryStateStoreService.class.getName());
+  }
+
+  private LocalResourceTrackerState loadTrackerState(TrackerState ts) {
+    LocalResourceTrackerState result = new LocalResourceTrackerState();
+    result.localizedResources.addAll(ts.localizedResources.values());
+    for (Map.Entry<Path, LocalResourceProto> entry :
+         ts.inProgressMap.entrySet()) {
+      result.inProgressResources.put(entry.getValue(), entry.getKey());
+    }
+    return result;
+  }
+
+  private TrackerState getTrackerState(TrackerKey key) {
+    TrackerState ts = trackerStates.get(key);
+    if (ts == null) {
+      ts = new TrackerState();
+      trackerStates.put(key, ts);
+    }
+    return ts;
+  }
+
+  @Override
+  public synchronized RecoveredLocalizationState loadLocalizationState() {
+    RecoveredLocalizationState result = new RecoveredLocalizationState();
+    for (Map.Entry<TrackerKey, TrackerState> e : trackerStates.entrySet()) {
+      TrackerKey tk = e.getKey();
+      TrackerState ts = e.getValue();
+      // check what kind of tracker state we have and recover appropriately
+      // public trackers have user == null
+      // private trackers have a valid user but appId == null
+      // app-specific trackers have a valid user and valid appId
+      if (tk.user == null) {
+        result.publicTrackerState = loadTrackerState(ts);
+      } else {
+        RecoveredUserResources rur = result.userResources.get(tk.user);
+        if (rur == null) {
+          rur = new RecoveredUserResources();
+          result.userResources.put(tk.user, rur);
+        }
+        if (tk.appId == null) {
+          rur.privateTrackerState = loadTrackerState(ts);
+        } else {
+          rur.appTrackerStates.put(tk.appId, loadTrackerState(ts));
+        }
+      }
+    }
+    return result;
+  }
+
+  @Override
+  public synchronized void startResourceLocalization(String user,
+      ApplicationId appId, LocalResourceProto proto, Path localPath) {
+    TrackerState ts = getTrackerState(new TrackerKey(user, appId));
+    ts.inProgressMap.put(localPath, proto);
+  }
+
+  @Override
+  public synchronized void finishResourceLocalization(String user,
+      ApplicationId appId, LocalizedResourceProto proto) {
+    TrackerState ts = getTrackerState(new TrackerKey(user, appId));
+    Path localPath = new Path(proto.getLocalPath());
+    ts.inProgressMap.remove(localPath);
+    ts.localizedResources.put(localPath, proto);
+  }
+
+  @Override
+  public synchronized void removeLocalizedResource(String user,
+      ApplicationId appId, Path localPath) {
+    TrackerState ts = trackerStates.get(new TrackerKey(user, appId));
+    if (ts != null) {
+      ts.inProgressMap.remove(localPath);
+      ts.localizedResources.remove(localPath);
+    }
+  }
+
+  @Override
+  protected void initStorage(Configuration conf) {
+    trackerStates = new HashMap<TrackerKey, TrackerState>();
+  }
+
+  @Override
+  protected void startStorage() {
+  }
+
+  @Override
+  protected void closeStorage() {
+  }
+
+
+  private static class TrackerState {
+    Map<Path, LocalResourceProto> inProgressMap =
+        new HashMap<Path, LocalResourceProto>();
+    Map<Path, LocalizedResourceProto> localizedResources =
+        new HashMap<Path, LocalizedResourceProto>();
+  }
+
+  private static class TrackerKey {
+    String user;
+    ApplicationId appId;
+
+    public TrackerKey(String user, ApplicationId appId) {
+      this.user = user;
+      this.appId = appId;
+    }
+
+    @Override
+    public int hashCode() {
+      final int prime = 31;
+      int result = 1;
+      result = prime * result + ((appId == null) ? 0 : appId.hashCode());
+      result = prime * result + ((user == null) ? 0 : user.hashCode());
+      return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj)
+        return true;
+      if (obj == null)
+        return false;
+      if (!(obj instanceof TrackerKey))
+        return false;
+      TrackerKey other = (TrackerKey) obj;
+      if (appId == null) {
+        if (other.appId != null)
+          return false;
+      } else if (!appId.equals(other.appId))
+        return false;
+      if (user == null) {
+        if (other.user != null)
+          return false;
+      } else if (!user.equals(other.user))
+        return false;
+      return true;
+    }
+  }
+}

+ 407 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java

@@ -0,0 +1,407 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.recovery;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.impl.pb.LocalResourcePBImpl;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.LocalResourceTrackerState;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredLocalizationState;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredUserResources;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestNMLeveldbStateStoreService {
+  private static final File TMP_DIR = new File(
+      System.getProperty("test.build.data",
+          System.getProperty("java.io.tmpdir")),
+      TestNMLeveldbStateStoreService.class.getName());
+
+  YarnConfiguration conf;
+  NMLeveldbStateStoreService stateStore;
+
+  @Before
+  public void setup() throws IOException {
+    FileUtil.fullyDelete(TMP_DIR);
+    conf = new YarnConfiguration();
+    conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
+    conf.set(YarnConfiguration.NM_RECOVERY_DIR, TMP_DIR.toString());
+    restartStateStore();
+  }
+
+  @After
+  public void cleanup() throws IOException {
+    if (stateStore != null) {
+      stateStore.close();
+    }
+    FileUtil.fullyDelete(TMP_DIR);
+  }
+
+  private void restartStateStore() throws IOException {
+    // need to close so leveldb releases database lock
+    if (stateStore != null) {
+      stateStore.close();
+    }
+    stateStore = new NMLeveldbStateStoreService();
+    stateStore.init(conf);
+    stateStore.start();
+  }
+
+  private void verifyEmptyState() throws IOException {
+    RecoveredLocalizationState state = stateStore.loadLocalizationState();
+    assertNotNull(state);
+    LocalResourceTrackerState pubts = state.getPublicTrackerState();
+    assertNotNull(pubts);
+    assertTrue(pubts.getLocalizedResources().isEmpty());
+    assertTrue(pubts.getInProgressResources().isEmpty());
+    assertTrue(state.getUserResources().isEmpty());
+  }
+
+  @Test
+  public void testEmptyState() throws IOException {
+    assertTrue(stateStore.canRecover());
+    verifyEmptyState();
+  }
+
+  @Test
+  public void testStartResourceLocalization() throws IOException {
+    String user = "somebody";
+    ApplicationId appId = ApplicationId.newInstance(1, 1);
+
+    // start a local resource for an application
+    Path appRsrcPath = new Path("hdfs://some/app/resource");
+    LocalResourcePBImpl rsrcPb = (LocalResourcePBImpl)
+        LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(appRsrcPath),
+            LocalResourceType.ARCHIVE, LocalResourceVisibility.APPLICATION,
+            123L, 456L);
+    LocalResourceProto appRsrcProto = rsrcPb.getProto();
+    Path appRsrcLocalPath = new Path("/some/local/dir/for/apprsrc");
+    stateStore.startResourceLocalization(user, appId, appRsrcProto,
+        appRsrcLocalPath);
+
+    // restart and verify only app resource is marked in-progress
+    restartStateStore();
+    RecoveredLocalizationState state = stateStore.loadLocalizationState();
+    LocalResourceTrackerState pubts = state.getPublicTrackerState();
+    assertTrue(pubts.getLocalizedResources().isEmpty());
+    assertTrue(pubts.getInProgressResources().isEmpty());
+    Map<String, RecoveredUserResources> userResources =
+        state.getUserResources();
+    assertEquals(1, userResources.size());
+    RecoveredUserResources rur = userResources.get(user);
+    LocalResourceTrackerState privts = rur.getPrivateTrackerState();
+    assertNotNull(privts);
+    assertTrue(privts.getLocalizedResources().isEmpty());
+    assertTrue(privts.getInProgressResources().isEmpty());
+    assertEquals(1, rur.getAppTrackerStates().size());
+    LocalResourceTrackerState appts = rur.getAppTrackerStates().get(appId);
+    assertNotNull(appts);
+    assertTrue(appts.getLocalizedResources().isEmpty());
+    assertEquals(1, appts.getInProgressResources().size());
+    assertEquals(appRsrcLocalPath,
+        appts.getInProgressResources().get(appRsrcProto));
+
+    // start some public and private resources
+    Path pubRsrcPath1 = new Path("hdfs://some/public/resource1");
+    rsrcPb = (LocalResourcePBImpl) LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(pubRsrcPath1),
+            LocalResourceType.FILE, LocalResourceVisibility.PUBLIC,
+            789L, 135L);
+    LocalResourceProto pubRsrcProto1 = rsrcPb.getProto();
+    Path pubRsrcLocalPath1 = new Path("/some/local/dir/for/pubrsrc1");
+    stateStore.startResourceLocalization(null, null, pubRsrcProto1,
+        pubRsrcLocalPath1);
+    Path pubRsrcPath2 = new Path("hdfs://some/public/resource2");
+    rsrcPb = (LocalResourcePBImpl) LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(pubRsrcPath2),
+            LocalResourceType.FILE, LocalResourceVisibility.PUBLIC,
+            789L, 135L);
+    LocalResourceProto pubRsrcProto2 = rsrcPb.getProto();
+    Path pubRsrcLocalPath2 = new Path("/some/local/dir/for/pubrsrc2");
+    stateStore.startResourceLocalization(null, null, pubRsrcProto2,
+        pubRsrcLocalPath2);
+    Path privRsrcPath = new Path("hdfs://some/private/resource");
+    rsrcPb = (LocalResourcePBImpl) LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(privRsrcPath),
+            LocalResourceType.PATTERN, LocalResourceVisibility.PRIVATE,
+            789L, 680L, "*pattern*");
+    LocalResourceProto privRsrcProto = rsrcPb.getProto();
+    Path privRsrcLocalPath = new Path("/some/local/dir/for/privrsrc");
+    stateStore.startResourceLocalization(user, null, privRsrcProto,
+        privRsrcLocalPath);
+
+    // restart and verify resources are marked in-progress
+    restartStateStore();
+    state = stateStore.loadLocalizationState();
+    pubts = state.getPublicTrackerState();
+    assertTrue(pubts.getLocalizedResources().isEmpty());
+    assertEquals(2, pubts.getInProgressResources().size());
+    assertEquals(pubRsrcLocalPath1,
+        pubts.getInProgressResources().get(pubRsrcProto1));
+    assertEquals(pubRsrcLocalPath2,
+        pubts.getInProgressResources().get(pubRsrcProto2));
+    userResources = state.getUserResources();
+    assertEquals(1, userResources.size());
+    rur = userResources.get(user);
+    privts = rur.getPrivateTrackerState();
+    assertNotNull(privts);
+    assertTrue(privts.getLocalizedResources().isEmpty());
+    assertEquals(1, privts.getInProgressResources().size());
+    assertEquals(privRsrcLocalPath,
+        privts.getInProgressResources().get(privRsrcProto));
+    assertEquals(1, rur.getAppTrackerStates().size());
+    appts = rur.getAppTrackerStates().get(appId);
+    assertNotNull(appts);
+    assertTrue(appts.getLocalizedResources().isEmpty());
+    assertEquals(1, appts.getInProgressResources().size());
+    assertEquals(appRsrcLocalPath,
+        appts.getInProgressResources().get(appRsrcProto));
+  }
+
+  @Test
+  public void testFinishResourceLocalization() throws IOException {
+    String user = "somebody";
+    ApplicationId appId = ApplicationId.newInstance(1, 1);
+
+    // start and finish a local resource for an application
+    Path appRsrcPath = new Path("hdfs://some/app/resource");
+    LocalResourcePBImpl rsrcPb = (LocalResourcePBImpl)
+        LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(appRsrcPath),
+            LocalResourceType.ARCHIVE, LocalResourceVisibility.APPLICATION,
+            123L, 456L);
+    LocalResourceProto appRsrcProto = rsrcPb.getProto();
+    Path appRsrcLocalPath = new Path("/some/local/dir/for/apprsrc");
+    stateStore.startResourceLocalization(user, appId, appRsrcProto,
+        appRsrcLocalPath);
+    LocalizedResourceProto appLocalizedProto =
+        LocalizedResourceProto.newBuilder()
+          .setResource(appRsrcProto)
+          .setLocalPath(appRsrcLocalPath.toString())
+          .setSize(1234567L)
+          .build();
+    stateStore.finishResourceLocalization(user, appId, appLocalizedProto);
+
+    // restart and verify only app resource is completed
+    restartStateStore();
+    RecoveredLocalizationState state = stateStore.loadLocalizationState();
+    LocalResourceTrackerState pubts = state.getPublicTrackerState();
+    assertTrue(pubts.getLocalizedResources().isEmpty());
+    assertTrue(pubts.getInProgressResources().isEmpty());
+    Map<String, RecoveredUserResources> userResources =
+        state.getUserResources();
+    assertEquals(1, userResources.size());
+    RecoveredUserResources rur = userResources.get(user);
+    LocalResourceTrackerState privts = rur.getPrivateTrackerState();
+    assertNotNull(privts);
+    assertTrue(privts.getLocalizedResources().isEmpty());
+    assertTrue(privts.getInProgressResources().isEmpty());
+    assertEquals(1, rur.getAppTrackerStates().size());
+    LocalResourceTrackerState appts = rur.getAppTrackerStates().get(appId);
+    assertNotNull(appts);
+    assertTrue(appts.getInProgressResources().isEmpty());
+    assertEquals(1, appts.getLocalizedResources().size());
+    assertEquals(appLocalizedProto,
+        appts.getLocalizedResources().iterator().next());
+
+    // start some public and private resources
+    Path pubRsrcPath1 = new Path("hdfs://some/public/resource1");
+    rsrcPb = (LocalResourcePBImpl) LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(pubRsrcPath1),
+            LocalResourceType.FILE, LocalResourceVisibility.PUBLIC,
+            789L, 135L);
+    LocalResourceProto pubRsrcProto1 = rsrcPb.getProto();
+    Path pubRsrcLocalPath1 = new Path("/some/local/dir/for/pubrsrc1");
+    stateStore.startResourceLocalization(null, null, pubRsrcProto1,
+        pubRsrcLocalPath1);
+    Path pubRsrcPath2 = new Path("hdfs://some/public/resource2");
+    rsrcPb = (LocalResourcePBImpl) LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(pubRsrcPath2),
+            LocalResourceType.FILE, LocalResourceVisibility.PUBLIC,
+            789L, 135L);
+    LocalResourceProto pubRsrcProto2 = rsrcPb.getProto();
+    Path pubRsrcLocalPath2 = new Path("/some/local/dir/for/pubrsrc2");
+    stateStore.startResourceLocalization(null, null, pubRsrcProto2,
+        pubRsrcLocalPath2);
+    Path privRsrcPath = new Path("hdfs://some/private/resource");
+    rsrcPb = (LocalResourcePBImpl) LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(privRsrcPath),
+            LocalResourceType.PATTERN, LocalResourceVisibility.PRIVATE,
+            789L, 680L, "*pattern*");
+    LocalResourceProto privRsrcProto = rsrcPb.getProto();
+    Path privRsrcLocalPath = new Path("/some/local/dir/for/privrsrc");
+    stateStore.startResourceLocalization(user, null, privRsrcProto,
+        privRsrcLocalPath);
+
+    // finish some of the resources
+    LocalizedResourceProto pubLocalizedProto1 =
+        LocalizedResourceProto.newBuilder()
+          .setResource(pubRsrcProto1)
+          .setLocalPath(pubRsrcLocalPath1.toString())
+          .setSize(pubRsrcProto1.getSize())
+          .build();
+    stateStore.finishResourceLocalization(null, null, pubLocalizedProto1);
+    LocalizedResourceProto privLocalizedProto =
+        LocalizedResourceProto.newBuilder()
+          .setResource(privRsrcProto)
+          .setLocalPath(privRsrcLocalPath.toString())
+          .setSize(privRsrcProto.getSize())
+          .build();
+    stateStore.finishResourceLocalization(user, null, privLocalizedProto);
+
+    // restart and verify state
+    restartStateStore();
+    state = stateStore.loadLocalizationState();
+    pubts = state.getPublicTrackerState();
+    assertEquals(1, pubts.getLocalizedResources().size());
+    assertEquals(pubLocalizedProto1,
+        pubts.getLocalizedResources().iterator().next());
+    assertEquals(1, pubts.getInProgressResources().size());
+    assertEquals(pubRsrcLocalPath2,
+        pubts.getInProgressResources().get(pubRsrcProto2));
+    userResources = state.getUserResources();
+    assertEquals(1, userResources.size());
+    rur = userResources.get(user);
+    privts = rur.getPrivateTrackerState();
+    assertNotNull(privts);
+    assertEquals(1, privts.getLocalizedResources().size());
+    assertEquals(privLocalizedProto,
+        privts.getLocalizedResources().iterator().next());
+    assertTrue(privts.getInProgressResources().isEmpty());
+    assertEquals(1, rur.getAppTrackerStates().size());
+    appts = rur.getAppTrackerStates().get(appId);
+    assertNotNull(appts);
+    assertTrue(appts.getInProgressResources().isEmpty());
+    assertEquals(1, appts.getLocalizedResources().size());
+    assertEquals(appLocalizedProto,
+        appts.getLocalizedResources().iterator().next());
+  }
+
+  @Test
+  public void testRemoveLocalizedResource() throws IOException {
+    String user = "somebody";
+    ApplicationId appId = ApplicationId.newInstance(1, 1);
+
+    // go through the complete lifecycle for an application local resource
+    Path appRsrcPath = new Path("hdfs://some/app/resource");
+    LocalResourcePBImpl rsrcPb = (LocalResourcePBImpl)
+        LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(appRsrcPath),
+            LocalResourceType.ARCHIVE, LocalResourceVisibility.APPLICATION,
+            123L, 456L);
+    LocalResourceProto appRsrcProto = rsrcPb.getProto();
+    Path appRsrcLocalPath = new Path("/some/local/dir/for/apprsrc");
+    stateStore.startResourceLocalization(user, appId, appRsrcProto,
+        appRsrcLocalPath);
+    LocalizedResourceProto appLocalizedProto =
+        LocalizedResourceProto.newBuilder()
+          .setResource(appRsrcProto)
+          .setLocalPath(appRsrcLocalPath.toString())
+          .setSize(1234567L)
+          .build();
+    stateStore.finishResourceLocalization(user, appId, appLocalizedProto);
+    stateStore.removeLocalizedResource(user, appId, appRsrcLocalPath);
+
+    restartStateStore();
+    verifyEmptyState();
+
+    // remove an app resource that didn't finish
+    stateStore.startResourceLocalization(user, appId, appRsrcProto,
+        appRsrcLocalPath);
+    stateStore.removeLocalizedResource(user, appId, appRsrcLocalPath);
+
+    restartStateStore();
+    verifyEmptyState();
+
+    // add public and private resources and remove some
+    Path pubRsrcPath1 = new Path("hdfs://some/public/resource1");
+    rsrcPb = (LocalResourcePBImpl) LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(pubRsrcPath1),
+            LocalResourceType.FILE, LocalResourceVisibility.PUBLIC,
+            789L, 135L);
+    LocalResourceProto pubRsrcProto1 = rsrcPb.getProto();
+    Path pubRsrcLocalPath1 = new Path("/some/local/dir/for/pubrsrc1");
+    stateStore.startResourceLocalization(null, null, pubRsrcProto1,
+        pubRsrcLocalPath1);
+    LocalizedResourceProto pubLocalizedProto1 =
+        LocalizedResourceProto.newBuilder()
+          .setResource(pubRsrcProto1)
+          .setLocalPath(pubRsrcLocalPath1.toString())
+          .setSize(789L)
+          .build();
+    stateStore.finishResourceLocalization(null, null, pubLocalizedProto1);
+    Path pubRsrcPath2 = new Path("hdfs://some/public/resource2");
+    rsrcPb = (LocalResourcePBImpl) LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(pubRsrcPath2),
+            LocalResourceType.FILE, LocalResourceVisibility.PUBLIC,
+            789L, 135L);
+    LocalResourceProto pubRsrcProto2 = rsrcPb.getProto();
+    Path pubRsrcLocalPath2 = new Path("/some/local/dir/for/pubrsrc2");
+    stateStore.startResourceLocalization(null, null, pubRsrcProto2,
+        pubRsrcLocalPath2);
+    LocalizedResourceProto pubLocalizedProto2 =
+        LocalizedResourceProto.newBuilder()
+          .setResource(pubRsrcProto2)
+          .setLocalPath(pubRsrcLocalPath2.toString())
+          .setSize(7654321L)
+          .build();
+    stateStore.finishResourceLocalization(null, null, pubLocalizedProto2);
+    stateStore.removeLocalizedResource(null, null, pubRsrcLocalPath2);
+    Path privRsrcPath = new Path("hdfs://some/private/resource");
+    rsrcPb = (LocalResourcePBImpl) LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(privRsrcPath),
+            LocalResourceType.PATTERN, LocalResourceVisibility.PRIVATE,
+            789L, 680L, "*pattern*");
+    LocalResourceProto privRsrcProto = rsrcPb.getProto();
+    Path privRsrcLocalPath = new Path("/some/local/dir/for/privrsrc");
+    stateStore.startResourceLocalization(user, null, privRsrcProto,
+        privRsrcLocalPath);
+    stateStore.removeLocalizedResource(user, null, privRsrcLocalPath);
+
+    // restart and verify state
+    restartStateStore();
+    RecoveredLocalizationState state = stateStore.loadLocalizationState();
+    LocalResourceTrackerState pubts = state.getPublicTrackerState();
+    assertTrue(pubts.getInProgressResources().isEmpty());
+    assertEquals(1, pubts.getLocalizedResources().size());
+    assertEquals(pubLocalizedProto1,
+        pubts.getLocalizedResources().iterator().next());
+    Map<String, RecoveredUserResources> userResources =
+        state.getUserResources();
+    assertTrue(userResources.isEmpty());
+  }
+}

+ 3 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java

@@ -54,6 +54,7 @@ import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
 import org.apache.hadoop.yarn.server.nodemanager.webapp.ContainerLogsPage.ContainersLogsBlock;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
@@ -77,7 +78,8 @@ public class TestContainerLogsPage {
     NodeHealthCheckerService healthChecker = new NodeHealthCheckerService();
     healthChecker.init(conf);
     LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler();
-    NMContext nmContext = new NodeManager.NMContext(null, null, dirsHandler, new ApplicationACLsManager(conf));
+    NMContext nmContext = new NodeManager.NMContext(null, null, dirsHandler,
+        new ApplicationACLsManager(conf), new NMNullStateStoreService());
     // Add an application and the corresponding containers
     RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(conf);
     String user = "nobody";

+ 7 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java

@@ -49,6 +49,8 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Cont
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
@@ -77,7 +79,8 @@ public class TestNMWebServer {
   }
   
   private int startNMWebAppServer(String webAddr) {
-    Context nmContext = new NodeManager.NMContext(null, null, null, null);
+    Context nmContext = new NodeManager.NMContext(null, null, null, null,
+        null);
     ResourceView resourceView = new ResourceView() {
       @Override
       public long getVmemAllocatedForContainers() {
@@ -135,7 +138,8 @@ public class TestNMWebServer {
 
   @Test
   public void testNMWebApp() throws IOException, YarnException {
-    Context nmContext = new NodeManager.NMContext(null, null, null, null);
+    Context nmContext = new NodeManager.NMContext(null, null, null, null,
+        null);
     ResourceView resourceView = new ResourceView() {
       @Override
       public long getVmemAllocatedForContainers() {
@@ -185,6 +189,7 @@ public class TestNMWebServer {
     ContainerId container2 =
         BuilderUtils.newContainerId(recordFactory, appId, appAttemptId, 1);
     NodeManagerMetrics metrics = mock(NodeManagerMetrics.class);
+    NMStateStoreService stateStore = new NMNullStateStoreService();
     for (ContainerId containerId : new ContainerId[] { container1,
         container2}) {
       // TODO: Use builder utils

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java

@@ -107,7 +107,8 @@ public class TestNMWebServices extends JerseyTest {
       healthChecker.init(conf);
       dirsHandler = healthChecker.getDiskHandler();
       aclsManager = new ApplicationACLsManager(conf);
-      nmContext = new NodeManager.NMContext(null, null, dirsHandler, aclsManager);
+      nmContext = new NodeManager.NMContext(null, null, dirsHandler,
+          aclsManager, null);
       NodeId nodeId = NodeId.newInstance("testhost.foo.com", 8042);
       ((NodeManager.NMContext)nmContext).setNodeId(nodeId);
       resourceView = new ResourceView() {

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java

@@ -99,7 +99,8 @@ public class TestNMWebServicesApps extends JerseyTest {
       healthChecker.init(conf);
       dirsHandler = healthChecker.getDiskHandler();
       aclsManager = new ApplicationACLsManager(conf);
-      nmContext = new NodeManager.NMContext(null, null, dirsHandler, aclsManager);
+      nmContext = new NodeManager.NMContext(null, null, dirsHandler,
+          aclsManager, null);
       NodeId nodeId = NodeId.newInstance("testhost.foo.com", 9999);
       ((NodeManager.NMContext)nmContext).setNodeId(nodeId);
       resourceView = new ResourceView() {

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java

@@ -122,7 +122,8 @@ public class TestNMWebServicesContainers extends JerseyTest {
       healthChecker.init(conf);
       dirsHandler = healthChecker.getDiskHandler();
       aclsManager = new ApplicationACLsManager(conf);
-      nmContext = new NodeManager.NMContext(null, null, dirsHandler, aclsManager) {
+      nmContext = new NodeManager.NMContext(null, null, dirsHandler,
+          aclsManager, null) {
         public NodeId getNodeId() {
           return NodeId.newInstance("testhost.foo.com", 8042);
         };

+ 11 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java

@@ -90,7 +90,9 @@ public class ZKRMStateStore extends RMStateStore {
 
   private String zkHostPort = null;
   private int zkSessionTimeout;
-  private long zkRetryInterval;
+
+  @VisibleForTesting
+  long zkRetryInterval;
   private List<ACL> zkAcl;
   private List<ZKUtil.ZKAuthInfo> zkAuths;
 
@@ -199,9 +201,14 @@ public class ZKRMStateStore extends RMStateStore {
     zkSessionTimeout =
         conf.getInt(YarnConfiguration.RM_ZK_TIMEOUT_MS,
             YarnConfiguration.DEFAULT_RM_ZK_TIMEOUT_MS);
-    zkRetryInterval =
-        conf.getLong(YarnConfiguration.RM_ZK_RETRY_INTERVAL_MS,
-          YarnConfiguration.DEFAULT_RM_ZK_RETRY_INTERVAL_MS);
+
+    if (HAUtil.isHAEnabled(conf)) {
+      zkRetryInterval = zkSessionTimeout / numRetries;
+    } else {
+      zkRetryInterval =
+          conf.getLong(YarnConfiguration.RM_ZK_RETRY_INTERVAL_MS,
+              YarnConfiguration.DEFAULT_RM_ZK_RETRY_INTERVAL_MS);
+    }
 
     zkAcl = RMZKUtils.getZKAcls(conf);
     zkAuths = RMZKUtils.getZKAuths(conf);

+ 23 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStoreZKClientConnections.java

@@ -41,6 +41,7 @@ import java.security.NoSuchAlgorithmException;
 import java.util.concurrent.CyclicBarrier;
 import java.util.concurrent.atomic.AtomicBoolean;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -203,7 +204,7 @@ public class TestZKRMStateStoreZKClientConnections extends
       LOG.error(error, e);
       fail(error);
     }
-    Assert.assertEquals("newBytes", new String(ret));
+    assertEquals("newBytes", new String(ret));
   }
 
   @Test(timeout = 20000)
@@ -232,7 +233,7 @@ public class TestZKRMStateStoreZKClientConnections extends
 
     try {
       byte[] ret = store.getDataWithRetries(path, false);
-      Assert.assertEquals("bytes", new String(ret));
+      assertEquals("bytes", new String(ret));
     } catch (Exception e) {
       String error = "New session creation failed";
       LOG.error(error, e);
@@ -281,4 +282,24 @@ public class TestZKRMStateStoreZKClientConnections extends
 
     zkClientTester.getRMStateStore(conf);
   }
+
+  @Test
+  public void testZKRetryInterval() throws Exception {
+    TestZKClient zkClientTester = new TestZKClient();
+    YarnConfiguration conf = new YarnConfiguration();
+
+    ZKRMStateStore store =
+        (ZKRMStateStore) zkClientTester.getRMStateStore(conf);
+    assertEquals(YarnConfiguration.DEFAULT_RM_ZK_RETRY_INTERVAL_MS,
+        store.zkRetryInterval);
+    store.stop();
+
+    conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true);
+    store =
+        (ZKRMStateStore) zkClientTester.getRMStateStore(conf);
+    assertEquals(YarnConfiguration.DEFAULT_RM_ZK_TIMEOUT_MS /
+            YarnConfiguration.DEFAULT_ZK_RM_NUM_RETRIES,
+        store.zkRetryInterval);
+    store.stop();
+  }
 }