Browse Source

merge from trunk r1598783

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/fs-encryption@1598785 13f79535-47bb-0310-9956-ffa450edef68
Charles Lamb 11 years ago
parent
commit
871616b993
81 changed files with 3023 additions and 390 deletions
  1. 11 3
      hadoop-common-project/hadoop-common/CHANGES.txt
  2. 11 0
      hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemon.sh
  3. 3 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
  4. 0 4
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java
  5. 6 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
  6. 11 12
      hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
  7. 3 6
      hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSCacheKeyProvider.java
  8. 6 1
      hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java
  9. 9 7
      hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/NfsExports.java
  10. 7 18
      hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/IdUserGroup.java
  11. 2 5
      hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Base.java
  12. 12 30
      hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Constant.java
  13. 3 6
      hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/nfs/TestNfsExports.java
  14. 9 7
      hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/nfs/nfs3/TestIdUserGroup.java
  15. 5 0
      hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService.java
  16. 99 0
      hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/server/TestHttpFSServer.java
  17. 57 0
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/conf/NfsConfigKeys.java
  18. 54 0
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/conf/NfsConfiguration.java
  19. 3 3
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/Mountd.java
  20. 13 15
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/RpcProgramMountd.java
  21. 4 4
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java
  22. 8 13
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3.java
  23. 2 1
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java
  24. 6 6
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtxCache.java
  25. 6 7
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/PrivilegedNfsGatewayStarter.java
  26. 28 25
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java
  27. 13 12
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteManager.java
  28. 2 2
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestMountd.java
  29. 5 4
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestOutOfOrderWrite.java
  30. 2 2
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestReaddir.java
  31. 6 6
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestDFSClientCache.java
  32. 4 4
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestExportsTable.java
  33. 20 20
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestOpenFileCtxCache.java
  34. 42 0
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestRpcProgramNfs3.java
  35. 6 6
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java
  36. 2 2
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/resources/core-site.xml
  37. 11 1
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  38. 0 6
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
  39. 4 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclStorage.java
  40. 51 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
  41. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js
  42. 7 7
      hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm
  43. 12 0
      hadoop-yarn-project/CHANGES.txt
  44. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
  45. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/HtmlPage.java
  46. 17 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
  47. 1 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/TestSubViews.java
  48. 8 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/view/TestHtmlPage.java
  49. 5 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml
  50. 3 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
  51. 49 9
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
  52. 12 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
  53. 77 5
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalCacheDirectoryManager.java
  54. 0 10
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTracker.java
  55. 156 29
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java
  56. 20 3
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java
  57. 111 33
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java
  58. 3 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceEventType.java
  59. 43 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceRecoveredEvent.java
  60. 377 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java
  61. 74 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java
  62. 163 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java
  63. 31 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_recovery.proto
  64. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java
  65. 3 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java
  66. 30 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java
  67. 6 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
  68. 3 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java
  69. 47 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalCacheDirectoryManager.java
  70. 310 8
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalResourcesTrackerImpl.java
  71. 270 14
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java
  72. 4 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceRetention.java
  73. 171 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java
  74. 407 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java
  75. 3 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java
  76. 7 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java
  77. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java
  78. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java
  79. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java
  80. 11 4
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java
  81. 23 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStoreZKClientConnections.java

+ 11 - 3
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -125,9 +125,6 @@ Trunk (Unreleased)
     HADOOP-10342. Add a new method to UGI to use a Kerberos login subject to
     HADOOP-10342. Add a new method to UGI to use a Kerberos login subject to
     build a new UGI. (Larry McCay via omalley)
     build a new UGI. (Larry McCay via omalley)
 
 
-    HADOOP-9968. Makes ProxyUsers to work with NetGroups (Benoy Antony via 
-    ddas)
-
     HADOOP-10237. JavaKeyStoreProvider needs to set keystore permissions 
     HADOOP-10237. JavaKeyStoreProvider needs to set keystore permissions 
     correctly. (Larry McCay via omalley)
     correctly. (Larry McCay via omalley)
 
 
@@ -337,6 +334,11 @@ Trunk (Unreleased)
     HADOOP-10625. Trim configuration names when putting/getting them
     HADOOP-10625. Trim configuration names when putting/getting them
     to properties. (Wangda Tan via xgong)
     to properties. (Wangda Tan via xgong)
 
 
+    HADOOP-10645. TestKMS fails because race condition writing acl files. (tucu)
+
+    HADOOP-10611. KMS, keyVersion name should not be assumed to be 
+    keyName@versionNumber. (tucu)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
     HADOOP-7761. Improve the performance of raw comparisons. (todd)
     HADOOP-7761. Improve the performance of raw comparisons. (todd)
@@ -419,6 +421,9 @@ Release 2.5.0 - UNRELEASED
     HADOOP-10618. Remove SingleNodeSetup.apt.vm. (Akira Ajisaka via
     HADOOP-10618. Remove SingleNodeSetup.apt.vm. (Akira Ajisaka via
     Arpit Agarwal)
     Arpit Agarwal)
 
 
+    HADOOP-9968. Makes ProxyUsers to work with NetGroups (Benoy Antony via 
+    ddas)
+
     HADOOP-10448. Support pluggable mechanism to specify proxy user settings.
     HADOOP-10448. Support pluggable mechanism to specify proxy user settings.
     (Benoy Antony via Arpit Agarwal)
     (Benoy Antony via Arpit Agarwal)
 
 
@@ -519,6 +524,9 @@ Release 2.5.0 - UNRELEASED
     HADOOP-10639. FileBasedKeyStoresFactory initialization is not using default
     HADOOP-10639. FileBasedKeyStoresFactory initialization is not using default
     for SSL_REQUIRE_CLIENT_CERT_KEY. (tucu)
     for SSL_REQUIRE_CLIENT_CERT_KEY. (tucu)
 
 
+    HADOOP-10638. Updating hadoop-daemon.sh to work as expected when nfs is
+    started as a privileged user. (Manikandan Narayanaswamy via atm)
+
 Release 2.4.1 - UNRELEASED
 Release 2.4.1 - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 11 - 0
hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemon.sh

@@ -87,6 +87,14 @@ if [ "$command" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_
   starting_secure_dn="true"
   starting_secure_dn="true"
 fi
 fi
 
 
+#Determine if we're starting a privileged NFS, if so, redefine the appropriate variables
+if [ "$command" == "nfs3" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_PRIVILEGED_NFS_USER" ]; then
+    export HADOOP_PID_DIR=$HADOOP_PRIVILEGED_NFS_PID_DIR
+    export HADOOP_LOG_DIR=$HADOOP_PRIVILEGED_NFS_LOG_DIR
+    export HADOOP_IDENT_STRING=$HADOOP_PRIVILEGED_NFS_USER
+    starting_privileged_nfs="true"
+fi
+
 if [ "$HADOOP_IDENT_STRING" = "" ]; then
 if [ "$HADOOP_IDENT_STRING" = "" ]; then
   export HADOOP_IDENT_STRING="$USER"
   export HADOOP_IDENT_STRING="$USER"
 fi
 fi
@@ -162,6 +170,9 @@ case $startStop in
       echo "ulimit -a for secure datanode user $HADOOP_SECURE_DN_USER" >> $log
       echo "ulimit -a for secure datanode user $HADOOP_SECURE_DN_USER" >> $log
       # capture the ulimit info for the appropriate user
       # capture the ulimit info for the appropriate user
       su --shell=/bin/bash $HADOOP_SECURE_DN_USER -c 'ulimit -a' >> $log 2>&1
       su --shell=/bin/bash $HADOOP_SECURE_DN_USER -c 'ulimit -a' >> $log 2>&1
+    elif [ "true" = "$starting_privileged_nfs" ]; then
+        echo "ulimit -a for privileged nfs user $HADOOP_PRIVILEGED_NFS_USER" >> $log
+        su --shell=/bin/bash $HADOOP_PRIVILEGED_NFS_USER -c 'ulimit -a' >> $log 2>&1
     else
     else
       echo "ulimit -a for user $USER" >> $log
       echo "ulimit -a for user $USER" >> $log
       ulimit -a >> $log 2>&1
       ulimit -a >> $log 2>&1

+ 3 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java

@@ -423,7 +423,9 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
       new DeprecationDelta("fs.default.name", 
       new DeprecationDelta("fs.default.name", 
         CommonConfigurationKeys.FS_DEFAULT_NAME_KEY),
         CommonConfigurationKeys.FS_DEFAULT_NAME_KEY),
       new DeprecationDelta("dfs.umaskmode",
       new DeprecationDelta("dfs.umaskmode",
-        CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY)
+        CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY),
+      new DeprecationDelta("dfs.nfs.exports.allowed.hosts",
+          CommonConfigurationKeys.NFS_EXPORTS_ALLOWED_HOSTS_KEY)
     };
     };
 
 
   /**
   /**

+ 0 - 4
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java

@@ -517,8 +517,4 @@ public class KMSClientProvider extends KeyProvider {
     // the server should not keep in memory state on behalf of clients either.
     // the server should not keep in memory state on behalf of clients either.
   }
   }
 
 
-  @VisibleForTesting
-  public static String buildVersionName(String name, int version) {
-    return KeyProvider.buildVersionName(name, version);
-  }
 }
 }

+ 6 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java

@@ -265,4 +265,10 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
   public static final boolean RPC_METRICS_QUANTILE_ENABLE_DEFAULT = false;
   public static final boolean RPC_METRICS_QUANTILE_ENABLE_DEFAULT = false;
   public static final String  RPC_METRICS_PERCENTILES_INTERVALS_KEY =
   public static final String  RPC_METRICS_PERCENTILES_INTERVALS_KEY =
       "rpc.metrics.percentiles.intervals";
       "rpc.metrics.percentiles.intervals";
+  
+  /** Allowed hosts for nfs exports */
+  public static final String NFS_EXPORTS_ALLOWED_HOSTS_SEPARATOR = ";";
+  public static final String NFS_EXPORTS_ALLOWED_HOSTS_KEY = "nfs.exports.allowed.hosts";
+  public static final String NFS_EXPORTS_ALLOWED_HOSTS_KEY_DEFAULT = "* rw";
+
 }
 }

+ 11 - 12
hadoop-common-project/hadoop-common/src/main/resources/core-default.xml

@@ -1309,18 +1309,17 @@
 </property>
 </property>
 
 
 <property>
 <property>
-  <name>nfs3.server.port</name>
-  <value>2049</value>
-  <description>
-      Specify the port number used by Hadoop NFS.
-  </description>
-</property>
-
-<property>
-  <name>nfs3.mountd.port</name>
-  <value>4242</value>
-  <description>
-      Specify the port number used by Hadoop mount daemon.
+  <name>nfs.exports.allowed.hosts</name>
+  <value>* rw</value>
+  <description>
+    By default, the export can be mounted by any client. The value string 
+    contains machine name and access privilege, separated by whitespace 
+    characters. The machine name format can be a single host, a Java regular 
+    expression, or an IPv4 address. The access privilege uses rw or ro to 
+    specify read/write or read-only access of the machines to exports. If the 
+    access privilege is not provided, the default is read-only. Entries are separated by ";".
+    For example: "192.168.0.0/22 rw ; host.*\.example\.com ; host1.test.org ro;".
+    Only the NFS gateway needs to restart after this property is updated. 
   </description>
   </description>
 </property>
 </property>
 
 

+ 3 - 6
hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSCacheKeyProvider.java

@@ -135,14 +135,11 @@ public class KMSCacheKeyProvider extends KeyProvider {
 
 
   @Override
   @Override
   public void deleteKey(String name) throws IOException {
   public void deleteKey(String name) throws IOException {
-    Metadata metadata = provider.getMetadata(name);
-    List<String> versions = new ArrayList<String>(metadata.getVersions());
-    for (int i = 0; i < metadata.getVersions(); i++) {
-      versions.add(KeyProvider.buildVersionName(name, i));
-    }
     provider.deleteKey(name);
     provider.deleteKey(name);
     currentKeyCache.invalidate(name);
     currentKeyCache.invalidate(name);
-    keyVersionCache.invalidateAll(versions);
+    // invalidating all key versions as we don't know which ones belonged to the
+    // deleted key
+    keyVersionCache.invalidateAll();
   }
   }
 
 
   @Override
   @Override

+ 6 - 1
hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java

@@ -577,7 +577,9 @@ public class TestKMS {
               Assert.fail(ex.toString());
               Assert.fail(ex.toString());
             }
             }
             try {
             try {
-              kp.getKeyVersion(KMSClientProvider.buildVersionName("k", 0));
+              // we are using JavaKeyStoreProvider for testing, so we know how
+              // the keyversion is created.
+              kp.getKeyVersion("k@0");
               Assert.fail();
               Assert.fail();
             } catch (AuthorizationException ex) {
             } catch (AuthorizationException ex) {
               //NOP
               //NOP
@@ -717,6 +719,9 @@ public class TestKMS {
           }
           }
         });
         });
 
 
+        //stop the reloader, to avoid running while we are writing the new file
+        KMSWebApp.getACLs().stopReloader();
+
         // test ACL reloading
         // test ACL reloading
         Thread.sleep(10); // to ensure the ACLs file modifiedTime is newer
         Thread.sleep(10); // to ensure the ACLs file modifiedTime is newer
         conf.set(KMSACLs.Type.CREATE.getConfigKey(), "foo");
         conf.set(KMSACLs.Type.CREATE.getConfigKey(), "foo");

+ 9 - 7
hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/NfsExports.java

@@ -27,6 +27,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.commons.net.util.SubnetUtils;
 import org.apache.commons.net.util.SubnetUtils;
 import org.apache.commons.net.util.SubnetUtils.SubnetInfo;
 import org.apache.commons.net.util.SubnetUtils.SubnetInfo;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
 import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
 import org.apache.hadoop.util.LightWeightCache;
 import org.apache.hadoop.util.LightWeightCache;
 import org.apache.hadoop.util.LightWeightGSet;
 import org.apache.hadoop.util.LightWeightGSet;
@@ -44,13 +45,14 @@ public class NfsExports {
   
   
   public static synchronized NfsExports getInstance(Configuration conf) {
   public static synchronized NfsExports getInstance(Configuration conf) {
     if (exports == null) {
     if (exports == null) {
-      String matchHosts = conf.get(Nfs3Constant.EXPORTS_ALLOWED_HOSTS_KEY,
-          Nfs3Constant.EXPORTS_ALLOWED_HOSTS_KEY_DEFAULT);
-      int cacheSize = conf.getInt(Nfs3Constant.EXPORTS_CACHE_SIZE_KEY,
-          Nfs3Constant.EXPORTS_CACHE_SIZE_DEFAULT);
+      String matchHosts = conf.get(
+          CommonConfigurationKeys.NFS_EXPORTS_ALLOWED_HOSTS_KEY,
+          CommonConfigurationKeys.NFS_EXPORTS_ALLOWED_HOSTS_KEY_DEFAULT);
+      int cacheSize = conf.getInt(Nfs3Constant.NFS_EXPORTS_CACHE_SIZE_KEY,
+          Nfs3Constant.NFS_EXPORTS_CACHE_SIZE_DEFAULT);
       long expirationPeriodNano = conf.getLong(
       long expirationPeriodNano = conf.getLong(
-          Nfs3Constant.EXPORTS_CACHE_EXPIRYTIME_MILLIS_KEY,
-          Nfs3Constant.EXPORTS_CACHE_EXPIRYTIME_MILLIS_DEFAULT) * 1000 * 1000;
+          Nfs3Constant.NFS_EXPORTS_CACHE_EXPIRYTIME_MILLIS_KEY,
+          Nfs3Constant.NFS_EXPORTS_CACHE_EXPIRYTIME_MILLIS_DEFAULT) * 1000 * 1000;
       exports = new NfsExports(cacheSize, expirationPeriodNano, matchHosts);
       exports = new NfsExports(cacheSize, expirationPeriodNano, matchHosts);
     }
     }
     return exports;
     return exports;
@@ -140,7 +142,7 @@ public class NfsExports {
     accessCache = new LightWeightCache<AccessCacheEntry, AccessCacheEntry>(
     accessCache = new LightWeightCache<AccessCacheEntry, AccessCacheEntry>(
         cacheSize, cacheSize, expirationPeriodNano, 0);        
         cacheSize, cacheSize, expirationPeriodNano, 0);        
     String[] matchStrings = matchHosts.split(
     String[] matchStrings = matchHosts.split(
-        Nfs3Constant.EXPORTS_ALLOWED_HOSTS_SEPARATOR);
+        CommonConfigurationKeys.NFS_EXPORTS_ALLOWED_HOSTS_SEPARATOR);
     mMatches = new ArrayList<Match>(matchStrings.length);
     mMatches = new ArrayList<Match>(matchStrings.length);
     for(String mStr : matchStrings) {
     for(String mStr : matchStrings) {
       if (LOG.isDebugEnabled()) {
       if (LOG.isDebugEnabled()) {

+ 7 - 18
hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/IdUserGroup.java

@@ -50,9 +50,6 @@ public class IdUserGroup {
   static final String MAC_GET_ALL_USERS_CMD = "dscl . -list /Users UniqueID";
   static final String MAC_GET_ALL_USERS_CMD = "dscl . -list /Users UniqueID";
   static final String MAC_GET_ALL_GROUPS_CMD = "dscl . -list /Groups PrimaryGroupID";
   static final String MAC_GET_ALL_GROUPS_CMD = "dscl . -list /Groups PrimaryGroupID";
 
 
-  // Used for finding the configured static mapping file.
-  static final String NFS_STATIC_MAPPING_FILE_KEY = "dfs.nfs.static.mapping.file";
-  private static final String NFS_STATIC_MAPPING_FILE_DEFAULT = "/etc/nfs.map";
   private final File staticMappingFile;
   private final File staticMappingFile;
 
 
   // Used for parsing the static mapping file.
   // Used for parsing the static mapping file.
@@ -61,11 +58,7 @@ public class IdUserGroup {
   private static final Pattern MAPPING_LINE =
   private static final Pattern MAPPING_LINE =
       Pattern.compile("^(uid|gid)\\s+(\\d+)\\s+(\\d+)\\s*(#.*)?$");
       Pattern.compile("^(uid|gid)\\s+(\\d+)\\s+(\\d+)\\s*(#.*)?$");
 
 
-  // Do update every 15 minutes by default
-  final static long TIMEOUT_DEFAULT = 15 * 60 * 1000; // ms
-  final static long TIMEOUT_MIN = 1 * 60 * 1000; // ms
   final private long timeout;
   final private long timeout;
-  final static String NFS_USERUPDATE_MILLY = "hadoop.nfs.userupdate.milly";
   
   
   // Maps for id to name map. Guarded by this object monitor lock
   // Maps for id to name map. Guarded by this object monitor lock
   private BiMap<Integer, String> uidNameMap = HashBiMap.create();
   private BiMap<Integer, String> uidNameMap = HashBiMap.create();
@@ -73,25 +66,21 @@ public class IdUserGroup {
 
 
   private long lastUpdateTime = 0; // Last time maps were updated
   private long lastUpdateTime = 0; // Last time maps were updated
   
   
-  public IdUserGroup() throws IOException {
-    timeout = TIMEOUT_DEFAULT;
-    staticMappingFile = new File(NFS_STATIC_MAPPING_FILE_DEFAULT);
-    updateMaps();
-  }
-  
   public IdUserGroup(Configuration conf) throws IOException {
   public IdUserGroup(Configuration conf) throws IOException {
-    long updateTime = conf.getLong(NFS_USERUPDATE_MILLY, TIMEOUT_DEFAULT);
+    long updateTime = conf.getLong(
+        Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_KEY,
+        Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_DEFAULT);
     // Minimal interval is 1 minute
     // Minimal interval is 1 minute
-    if (updateTime < TIMEOUT_MIN) {
+    if (updateTime < Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_MIN) {
       LOG.info("User configured user account update time is less"
       LOG.info("User configured user account update time is less"
           + " than 1 minute. Use 1 minute instead.");
           + " than 1 minute. Use 1 minute instead.");
-      timeout = TIMEOUT_MIN;
+      timeout = Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_MIN;
     } else {
     } else {
       timeout = updateTime;
       timeout = updateTime;
     }
     }
     
     
-    String staticFilePath = conf.get(NFS_STATIC_MAPPING_FILE_KEY,
-        NFS_STATIC_MAPPING_FILE_DEFAULT);
+    String staticFilePath = conf.get(Nfs3Constant.NFS_STATIC_MAPPING_FILE_KEY,
+        Nfs3Constant.NFS_STATIC_MAPPING_FILE_DEFAULT);
     staticMappingFile = new File(staticFilePath);
     staticMappingFile = new File(staticFilePath);
     
     
     updateMaps();
     updateMaps();

+ 2 - 5
hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Base.java

@@ -33,7 +33,6 @@ import org.apache.hadoop.util.ShutdownHookManager;
 public abstract class Nfs3Base {
 public abstract class Nfs3Base {
   public static final Log LOG = LogFactory.getLog(Nfs3Base.class);
   public static final Log LOG = LogFactory.getLog(Nfs3Base.class);
   private final RpcProgram rpcProgram;
   private final RpcProgram rpcProgram;
-  private final int nfsPort;
   private int nfsBoundPort; // Will set after server starts
   private int nfsBoundPort; // Will set after server starts
     
     
   public RpcProgram getRpcProgram() {
   public RpcProgram getRpcProgram() {
@@ -42,9 +41,7 @@ public abstract class Nfs3Base {
 
 
   protected Nfs3Base(RpcProgram rpcProgram, Configuration conf) {
   protected Nfs3Base(RpcProgram rpcProgram, Configuration conf) {
     this.rpcProgram = rpcProgram;
     this.rpcProgram = rpcProgram;
-    this.nfsPort = conf.getInt(Nfs3Constant.NFS3_SERVER_PORT,
-        Nfs3Constant.NFS3_SERVER_PORT_DEFAULT);
-    LOG.info("NFS server port set to: " + nfsPort);
+    LOG.info("NFS server port set to: " + rpcProgram.getPort());
   }
   }
 
 
   public void start(boolean register) {
   public void start(boolean register) {
@@ -58,7 +55,7 @@ public abstract class Nfs3Base {
   }
   }
 
 
   private void startTCPServer() {
   private void startTCPServer() {
-    SimpleTcpServer tcpServer = new SimpleTcpServer(nfsPort,
+    SimpleTcpServer tcpServer = new SimpleTcpServer(rpcProgram.getPort(),
         rpcProgram, 0);
         rpcProgram, 0);
     rpcProgram.startDaemons();
     rpcProgram.startDaemons();
     tcpServer.run();
     tcpServer.run();

+ 12 - 30
hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Constant.java

@@ -25,10 +25,6 @@ public class Nfs3Constant {
   // The local rpcbind/portmapper port.
   // The local rpcbind/portmapper port.
   public final static int SUN_RPCBIND = 111;
   public final static int SUN_RPCBIND = 111;
 
 
-  // The IP port number for NFS.
-  public final static String NFS3_SERVER_PORT = "nfs3.server.port";
-  public final static int NFS3_SERVER_PORT_DEFAULT = 2049;
-
   // The RPC program number for NFS.
   // The RPC program number for NFS.
   public final static int PROGRAM = 100003;
   public final static int PROGRAM = 100003;
 
 
@@ -191,36 +187,22 @@ public class Nfs3Constant {
   public final static int CREATE_GUARDED = 1;
   public final static int CREATE_GUARDED = 1;
   public final static int CREATE_EXCLUSIVE = 2;
   public final static int CREATE_EXCLUSIVE = 2;
   
   
-  public static final String EXPORTS_ALLOWED_HOSTS_SEPARATOR = ";";
-  /** Allowed hosts for nfs exports */
-  public static final String EXPORTS_ALLOWED_HOSTS_KEY = "dfs.nfs.exports.allowed.hosts";
-  public static final String EXPORTS_ALLOWED_HOSTS_KEY_DEFAULT = "* rw";
   /** Size for nfs exports cache */
   /** Size for nfs exports cache */
-  public static final String EXPORTS_CACHE_SIZE_KEY = "dfs.nfs.exports.cache.size";
-  public static final int EXPORTS_CACHE_SIZE_DEFAULT = 512;
+  public static final String NFS_EXPORTS_CACHE_SIZE_KEY = "nfs.exports.cache.size";
+  public static final int NFS_EXPORTS_CACHE_SIZE_DEFAULT = 512;
   /** Expiration time for nfs exports cache entry */
   /** Expiration time for nfs exports cache entry */
-  public static final String EXPORTS_CACHE_EXPIRYTIME_MILLIS_KEY = "dfs.nfs.exports.cache.expirytime.millis";
-  public static final long EXPORTS_CACHE_EXPIRYTIME_MILLIS_DEFAULT = 15 * 60 * 1000; // 15 min
-
-  public static final String FILE_DUMP_DIR_KEY = "dfs.nfs3.dump.dir";
-  public static final String FILE_DUMP_DIR_DEFAULT = "/tmp/.hdfs-nfs";
-  public static final String ENABLE_FILE_DUMP_KEY = "dfs.nfs3.enableDump";
-  public static final boolean ENABLE_FILE_DUMP_DEFAULT = true;
-  public static final String MAX_READ_TRANSFER_SIZE_KEY = "dfs.nfs.rtmax";
-  public static final int MAX_READ_TRANSFER_SIZE_DEFAULT = 1024 * 1024;
-  public static final String MAX_WRITE_TRANSFER_SIZE_KEY = "dfs.nfs.wtmax";
-  public static final int MAX_WRITE_TRANSFER_SIZE_DEFAULT = 1024 * 1024;
-  public static final String MAX_READDIR_TRANSFER_SIZE_KEY = "dfs.nfs.dtmax";
-  public static final int MAX_READDIR_TRANSFER_SIZE_DEFAULT = 64 * 1024;
-  public static final String MAX_OPEN_FILES = "dfs.nfs3.max.open.files";
-  public static final int MAX_OPEN_FILES_DEFAULT = 256;
-  public static final String OUTPUT_STREAM_TIMEOUT = "dfs.nfs3.stream.timeout";
-  public static final long OUTPUT_STREAM_TIMEOUT_DEFAULT = 10 * 60 * 1000; // 10 minutes
-  public static final long OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT = 10 * 1000; //10 seconds
+  public static final String NFS_EXPORTS_CACHE_EXPIRYTIME_MILLIS_KEY = "nfs.exports.cache.expirytime.millis";
+  public static final long NFS_EXPORTS_CACHE_EXPIRYTIME_MILLIS_DEFAULT = 15 * 60 * 1000; // 15 min
+
+  /** Do user/group update every 15 minutes by default, minimum 1 minute */
+  public final static String NFS_USERGROUP_UPDATE_MILLIS_KEY = "nfs.usergroup.update.millis";
+  public final static long NFS_USERGROUP_UPDATE_MILLIS_DEFAULT = 15 * 60 * 1000; // ms
+  final static long NFS_USERGROUP_UPDATE_MILLIS_MIN = 1 * 60 * 1000; // ms
   
   
   public final static String UNKNOWN_USER = "nobody";
   public final static String UNKNOWN_USER = "nobody";
   public final static String UNKNOWN_GROUP = "nobody";
   public final static String UNKNOWN_GROUP = "nobody";
   
   
-  public final static String EXPORT_POINT = "dfs.nfs3.export.point";
-  public final static String EXPORT_POINT_DEFAULT = "/";
+  // Used for finding the configured static mapping file.
+  public static final String NFS_STATIC_MAPPING_FILE_KEY = "nfs.static.mapping.file";
+  public static final String NFS_STATIC_MAPPING_FILE_DEFAULT = "/etc/nfs.map";
 }
 }

+ 3 - 6
hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/nfs/TestNfsExports.java

@@ -17,11 +17,8 @@
  */
  */
 package org.apache.hadoop.nfs;
 package org.apache.hadoop.nfs;
 
 
-import org.junit.Assert;
-
-import org.apache.hadoop.nfs.AccessPrivilege;
-import org.apache.hadoop.nfs.NfsExports;
 import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
 import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
+import org.junit.Assert;
 import org.junit.Test;
 import org.junit.Test;
 
 
 public class TestNfsExports {
 public class TestNfsExports {
@@ -32,9 +29,9 @@ public class TestNfsExports {
   private final String hostname2 = "a.b.org";
   private final String hostname2 = "a.b.org";
   
   
   private static final long ExpirationPeriod = 
   private static final long ExpirationPeriod = 
-      Nfs3Constant.EXPORTS_CACHE_EXPIRYTIME_MILLIS_DEFAULT * 1000 * 1000;
+      Nfs3Constant.NFS_EXPORTS_CACHE_EXPIRYTIME_MILLIS_DEFAULT * 1000 * 1000;
   
   
-  private static final int CacheSize = Nfs3Constant.EXPORTS_CACHE_SIZE_DEFAULT;
+  private static final int CacheSize = Nfs3Constant.NFS_EXPORTS_CACHE_SIZE_DEFAULT;
   private static final long NanosPerMillis = 1000000;
   private static final long NanosPerMillis = 1000000;
 
 
   @Test
   @Test

+ 9 - 7
hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/nfs/nfs3/TestIdUserGroup.java

@@ -199,17 +199,19 @@ public class TestIdUserGroup {
 
 
   @Test
   @Test
   public void testUserUpdateSetting() throws IOException {
   public void testUserUpdateSetting() throws IOException {
-    IdUserGroup iug = new IdUserGroup();
-    assertEquals(iug.getTimeout(), IdUserGroup.TIMEOUT_DEFAULT);
+    IdUserGroup iug = new IdUserGroup(new Configuration());
+    assertEquals(iug.getTimeout(),
+        Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_DEFAULT);
 
 
     Configuration conf = new Configuration();
     Configuration conf = new Configuration();
-    conf.setLong(IdUserGroup.NFS_USERUPDATE_MILLY, 0);
+    conf.setLong(Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_KEY, 0);
     iug = new IdUserGroup(conf);
     iug = new IdUserGroup(conf);
-    assertEquals(iug.getTimeout(), IdUserGroup.TIMEOUT_MIN);
+    assertEquals(iug.getTimeout(), Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_MIN);
 
 
-    conf.setLong(IdUserGroup.NFS_USERUPDATE_MILLY,
-        IdUserGroup.TIMEOUT_DEFAULT * 2);
+    conf.setLong(Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_KEY,
+        Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_DEFAULT * 2);
     iug = new IdUserGroup(conf);
     iug = new IdUserGroup(conf);
-    assertEquals(iug.getTimeout(), IdUserGroup.TIMEOUT_DEFAULT * 2);
+    assertEquals(iug.getTimeout(),
+        Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_DEFAULT * 2);
   }
   }
 }
 }

+ 5 - 0
hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/service/hadoop/FileSystemAccessService.java

@@ -23,6 +23,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.lib.server.BaseService;
 import org.apache.hadoop.lib.server.BaseService;
 import org.apache.hadoop.lib.server.ServiceException;
 import org.apache.hadoop.lib.server.ServiceException;
 import org.apache.hadoop.lib.service.FileSystemAccess;
 import org.apache.hadoop.lib.service.FileSystemAccess;
@@ -395,6 +396,10 @@ public class FileSystemAccessService extends BaseService implements FileSystemAc
     Configuration conf = new Configuration(true);
     Configuration conf = new Configuration(true);
     ConfigurationUtils.copy(serviceHadoopConf, conf);
     ConfigurationUtils.copy(serviceHadoopConf, conf);
     conf.setBoolean(FILE_SYSTEM_SERVICE_CREATED, true);
     conf.setBoolean(FILE_SYSTEM_SERVICE_CREATED, true);
+
+    // Force-clear server-side umask to make HttpFS match WebHDFS behavior
+    conf.set(FsPermission.UMASK_LABEL, "000");
+
     return conf;
     return conf;
   }
   }
 
 

+ 99 - 0
hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/fs/http/server/TestHttpFSServer.java

@@ -231,6 +231,105 @@ public class TestHttpFSServer extends HFSTestCase {
     reader.close();
     reader.close();
   }
   }
 
 
+  /**
+   * Talks to the http interface to create a file.
+   *
+   * @param filename The file to create
+   * @param perms The permission field, if any (may be null)
+   * @throws Exception
+   */
+  private void createWithHttp ( String filename, String perms )
+          throws Exception {
+    String user = HadoopUsersConfTestHelper.getHadoopUsers()[0];
+    String pathOps;
+    if ( perms == null ) {
+      pathOps = MessageFormat.format(
+              "/webhdfs/v1/{0}?user.name={1}&op=CREATE",
+              filename, user);
+    } else {
+      pathOps = MessageFormat.format(
+              "/webhdfs/v1/{0}?user.name={1}&permission={2}&op=CREATE",
+              filename, user, perms);
+    }
+    URL url = new URL(TestJettyHelper.getJettyURL(), pathOps);
+    HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+    conn.addRequestProperty("Content-Type", "application/octet-stream");
+    conn.setRequestMethod("PUT");
+    conn.connect();
+    Assert.assertEquals(HttpURLConnection.HTTP_CREATED, conn.getResponseCode());
+  }
+
+  /**
+   * Talks to the http interface to get the json output of the GETFILESTATUS
+   * command on the given file.
+   *
+   * @param filename The file to query.
+   * @return A string containing the JSON output describing the file.
+   * @throws Exception
+   */
+  private String getFileStatus ( String filename ) throws Exception {
+    String user = HadoopUsersConfTestHelper.getHadoopUsers()[0];
+    String pathOps = MessageFormat.format(
+            "/webhdfs/v1/{0}?user.name={1}&op=GETFILESTATUS",
+            filename, user);
+    URL url = new URL(TestJettyHelper.getJettyURL(), pathOps);
+    HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+    conn.connect();
+    Assert.assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode());
+
+    BufferedReader reader =
+            new BufferedReader(new InputStreamReader(conn.getInputStream()));
+
+    return reader.readLine();
+  }
+
+  /**
+   * Given the JSON output from the GETFILESTATUS call, return the
+   * 'permission' value.
+   *
+   * @param statusJson JSON from GETFILESTATUS
+   * @return The value of 'permission' in statusJson
+   * @throws Exception
+   */
+  private String getPerms ( String statusJson ) throws Exception {
+    JSONParser parser = new JSONParser();
+    JSONObject jsonObject = (JSONObject) parser.parse(statusJson);
+    JSONObject details = (JSONObject) jsonObject.get("FileStatus");
+    return (String) details.get("permission");
+  }
+
+  /**
+   * Validate that files are created with 755 permissions when no
+   * 'permissions' attribute is specified, and when 'permissions'
+   * is specified, that value is honored.
+   */
+  @Test
+  @TestDir
+  @TestJetty
+  @TestHdfs
+  public void testPerms() throws Exception {
+    createHttpFSServer(false);
+
+    FileSystem fs = FileSystem.get(TestHdfsHelper.getHdfsConf());
+    fs.mkdirs(new Path("/perm"));
+
+    createWithHttp("/perm/none", null);
+    String statusJson = getFileStatus("/perm/none");
+    Assert.assertTrue("755".equals(getPerms(statusJson)));
+
+    createWithHttp("/perm/p-777", "777");
+    statusJson = getFileStatus("/perm/p-777");
+    Assert.assertTrue("777".equals(getPerms(statusJson)));
+
+    createWithHttp("/perm/p-654", "654");
+    statusJson = getFileStatus("/perm/p-654");
+    Assert.assertTrue("654".equals(getPerms(statusJson)));
+
+    createWithHttp("/perm/p-321", "321");
+    statusJson = getFileStatus("/perm/p-321");
+    Assert.assertTrue("321".equals(getPerms(statusJson)));
+  }
+
   @Test
   @Test
   @TestDir
   @TestDir
   @TestJetty
   @TestJetty

+ 57 - 0
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/conf/NfsConfigKeys.java

@@ -0,0 +1,57 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.hdfs.nfs.conf;
+
+public class NfsConfigKeys {
+
+  // The IP port number for NFS and mountd.
+  public final static String DFS_NFS_SERVER_PORT_KEY = "nfs.server.port";
+  public final static int DFS_NFS_SERVER_PORT_DEFAULT = 2049;
+  public final static String DFS_NFS_MOUNTD_PORT_KEY = "nfs.mountd.port";
+  public final static int DFS_NFS_MOUNTD_PORT_DEFAULT = 4242;
+  
+  public static final String DFS_NFS_FILE_DUMP_KEY = "nfs.file.dump";
+  public static final boolean DFS_NFS_FILE_DUMP_DEFAULT = true;
+  public static final String DFS_NFS_FILE_DUMP_DIR_KEY = "nfs.file.dump.dir";
+  public static final String DFS_NFS_FILE_DUMP_DIR_DEFAULT = "/tmp/.hdfs-nfs";
+  
+  public static final String DFS_NFS_MAX_READ_TRANSFER_SIZE_KEY = "nfs.rtmax";
+  public static final int DFS_NFS_MAX_READ_TRANSFER_SIZE_DEFAULT = 1024 * 1024;
+  public static final String DFS_NFS_MAX_WRITE_TRANSFER_SIZE_KEY = "nfs.wtmax";
+  public static final int DFS_NFS_MAX_WRITE_TRANSFER_SIZE_DEFAULT = 1024 * 1024;
+  public static final String DFS_NFS_MAX_READDIR_TRANSFER_SIZE_KEY = "nfs.dtmax";
+  public static final int DFS_NFS_MAX_READDIR_TRANSFER_SIZE_DEFAULT = 64 * 1024;
+
+  public static final String DFS_NFS_MAX_OPEN_FILES_KEY = "nfs.max.open.files";
+  public static final int DFS_NFS_MAX_OPEN_FILES_DEFAULT = 256;
+
+  public static final String DFS_NFS_STREAM_TIMEOUT_KEY = "nfs.stream.timeout";
+  public static final long DFS_NFS_STREAM_TIMEOUT_DEFAULT = 10 * 60 * 1000; // 10 minutes
+  public static final long DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT = 10 * 1000; // 10 seconds
+
+  public final static String DFS_NFS_EXPORT_POINT_KEY = "nfs.export.point";
+  public final static String DFS_NFS_EXPORT_POINT_DEFAULT = "/";
+  
+  public static final String DFS_NFS_KEYTAB_FILE_KEY = "nfs.keytab.file";
+  public static final String DFS_NFS_KERBEROS_PRINCIPAL_KEY = "nfs.kerberos.principal";
+  public static final String DFS_NFS_REGISTRATION_PORT_KEY = "nfs.registration.port";
+  public static final int    DFS_NFS_REGISTRATION_PORT_DEFAULT = 40; // Currently unassigned.
+  public static final String  DFS_NFS_ALLOW_INSECURE_PORTS_KEY = "nfs.allow.insecure.ports";
+  public static final boolean DFS_NFS_ALLOW_INSECURE_PORTS_DEFAULT = true;
+}

+ 54 - 0
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/conf/NfsConfiguration.java

@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.nfs.conf;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
+
+/**
+ * Adds deprecated keys into the configuration.
+ */
+public class NfsConfiguration extends HdfsConfiguration {
+  static {
+    addDeprecatedKeys();
+  }
+
+  private static void addDeprecatedKeys() {
+    Configuration.addDeprecations(new DeprecationDelta[] {
+        new DeprecationDelta("nfs3.server.port",
+            NfsConfigKeys.DFS_NFS_SERVER_PORT_KEY),
+        new DeprecationDelta("nfs3.mountd.port",
+            NfsConfigKeys.DFS_NFS_MOUNTD_PORT_KEY),
+        new DeprecationDelta("dfs.nfs.exports.cache.expirytime.millis",
+            Nfs3Constant.NFS_EXPORTS_CACHE_EXPIRYTIME_MILLIS_KEY),
+        new DeprecationDelta("hadoop.nfs.userupdate.milly",
+            Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_KEY),
+        new DeprecationDelta("dfs.nfs3.enableDump",
+            NfsConfigKeys.DFS_NFS_FILE_DUMP_KEY),
+        new DeprecationDelta("dfs.nfs3.dump.dir",
+            NfsConfigKeys.DFS_NFS_FILE_DUMP_DIR_KEY),
+        new DeprecationDelta("dfs.nfs3.max.open.files",
+            NfsConfigKeys.DFS_NFS_MAX_OPEN_FILES_KEY),
+        new DeprecationDelta("dfs.nfs3.stream.timeout",
+            NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_KEY),
+        new DeprecationDelta("dfs.nfs3.export.point",
+            NfsConfigKeys.DFS_NFS_EXPORT_POINT_KEY) });
+  }
+}

+ 3 - 3
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/Mountd.java

@@ -20,7 +20,7 @@ package org.apache.hadoop.hdfs.nfs.mount;
 import java.io.IOException;
 import java.io.IOException;
 import java.net.DatagramSocket;
 import java.net.DatagramSocket;
 
 
-import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.mount.MountdBase;
 import org.apache.hadoop.mount.MountdBase;
 
 
 /**
 /**
@@ -32,13 +32,13 @@ import org.apache.hadoop.mount.MountdBase;
  */
  */
 public class Mountd extends MountdBase {
 public class Mountd extends MountdBase {
 
 
-  public Mountd(Configuration config, DatagramSocket registrationSocket,
+  public Mountd(NfsConfiguration config, DatagramSocket registrationSocket,
       boolean allowInsecurePorts) throws IOException {
       boolean allowInsecurePorts) throws IOException {
     super(new RpcProgramMountd(config, registrationSocket, allowInsecurePorts));
     super(new RpcProgramMountd(config, registrationSocket, allowInsecurePorts));
   }
   }
   
   
   public static void main(String[] args) throws IOException {
   public static void main(String[] args) throws IOException {
-    Configuration config = new Configuration();
+    NfsConfiguration config = new NfsConfiguration();
     Mountd mountd = new Mountd(config, null, true);
     Mountd mountd = new Mountd(config, null, true);
     mountd.start(true);
     mountd.start(true);
   }
   }

+ 13 - 15
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/mount/RpcProgramMountd.java

@@ -16,9 +16,6 @@
  * limitations under the License.
  * limitations under the License.
  */
  */
 package org.apache.hadoop.hdfs.nfs.mount;
 package org.apache.hadoop.hdfs.nfs.mount;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NFS_KEYTAB_FILE_KEY;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NFS_KERBEROS_PRINCIPAL_KEY;
-
 import java.io.IOException;
 import java.io.IOException;
 import java.net.DatagramSocket;
 import java.net.DatagramSocket;
 import java.net.InetAddress;
 import java.net.InetAddress;
@@ -29,8 +26,9 @@ import java.util.List;
 
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSClient;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.mount.MountEntry;
 import org.apache.hadoop.mount.MountEntry;
@@ -39,7 +37,6 @@ import org.apache.hadoop.mount.MountResponse;
 import org.apache.hadoop.nfs.AccessPrivilege;
 import org.apache.hadoop.nfs.AccessPrivilege;
 import org.apache.hadoop.nfs.NfsExports;
 import org.apache.hadoop.nfs.NfsExports;
 import org.apache.hadoop.nfs.nfs3.FileHandle;
 import org.apache.hadoop.nfs.nfs3.FileHandle;
-import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
 import org.apache.hadoop.nfs.nfs3.Nfs3Status;
 import org.apache.hadoop.nfs.nfs3.Nfs3Status;
 import org.apache.hadoop.oncrpc.RpcAcceptedReply;
 import org.apache.hadoop.oncrpc.RpcAcceptedReply;
 import org.apache.hadoop.oncrpc.RpcCall;
 import org.apache.hadoop.oncrpc.RpcCall;
@@ -66,9 +63,7 @@ public class RpcProgramMountd extends RpcProgram implements MountInterface {
   public static final int VERSION_1 = 1;
   public static final int VERSION_1 = 1;
   public static final int VERSION_2 = 2;
   public static final int VERSION_2 = 2;
   public static final int VERSION_3 = 3;
   public static final int VERSION_3 = 3;
-  public static final int PORT = 4242;
 
 
-  // Need DFSClient for branch-1 to get ExtendedHdfsFileStatus
   private final DFSClient dfsClient;
   private final DFSClient dfsClient;
   
   
   /** Synchronized list */
   /** Synchronized list */
@@ -79,19 +74,22 @@ public class RpcProgramMountd extends RpcProgram implements MountInterface {
   
   
   private final NfsExports hostsMatcher;
   private final NfsExports hostsMatcher;
 
 
-  public RpcProgramMountd(Configuration config, DatagramSocket registrationSocket,
-      boolean allowInsecurePorts) throws IOException {
+  public RpcProgramMountd(NfsConfiguration config,
+      DatagramSocket registrationSocket, boolean allowInsecurePorts)
+      throws IOException {
     // Note that RPC cache is not enabled
     // Note that RPC cache is not enabled
-    super("mountd", "localhost", config.getInt("nfs3.mountd.port", PORT),
-        PROGRAM, VERSION_1, VERSION_3, registrationSocket, allowInsecurePorts);
+    super("mountd", "localhost", config.getInt(
+        NfsConfigKeys.DFS_NFS_MOUNTD_PORT_KEY,
+        NfsConfigKeys.DFS_NFS_MOUNTD_PORT_DEFAULT), PROGRAM, VERSION_1,
+        VERSION_3, registrationSocket, allowInsecurePorts);
     exports = new ArrayList<String>();
     exports = new ArrayList<String>();
-    exports.add(config.get(Nfs3Constant.EXPORT_POINT,
-        Nfs3Constant.EXPORT_POINT_DEFAULT));
+    exports.add(config.get(NfsConfigKeys.DFS_NFS_EXPORT_POINT_KEY,
+        NfsConfigKeys.DFS_NFS_EXPORT_POINT_DEFAULT));
     this.hostsMatcher = NfsExports.getInstance(config);
     this.hostsMatcher = NfsExports.getInstance(config);
     this.mounts = Collections.synchronizedList(new ArrayList<MountEntry>());
     this.mounts = Collections.synchronizedList(new ArrayList<MountEntry>());
     UserGroupInformation.setConfiguration(config);
     UserGroupInformation.setConfiguration(config);
-    SecurityUtil.login(config, DFS_NFS_KEYTAB_FILE_KEY,
-            DFS_NFS_KERBEROS_PRINCIPAL_KEY);
+    SecurityUtil.login(config, NfsConfigKeys.DFS_NFS_KEYTAB_FILE_KEY,
+        NfsConfigKeys.DFS_NFS_KERBEROS_PRINCIPAL_KEY);
     this.dfsClient = new DFSClient(NameNode.getAddress(config), config);
     this.dfsClient = new DFSClient(NameNode.getAddress(config), config);
   }
   }
   
   

+ 4 - 4
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java

@@ -30,10 +30,10 @@ import java.util.concurrent.TimeUnit;
 
 
 import com.google.common.base.Preconditions;
 import com.google.common.base.Preconditions;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSInputStream;
 import org.apache.hadoop.hdfs.DFSInputStream;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.io.MultipleIOException;
 import org.apache.hadoop.io.MultipleIOException;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -72,7 +72,7 @@ class DFSClientCache {
   final static int DEFAULT_DFS_INPUTSTREAM_CACHE_SIZE = 1024;
   final static int DEFAULT_DFS_INPUTSTREAM_CACHE_SIZE = 1024;
   final static int DEFAULT_DFS_INPUTSTREAM_CACHE_TTL = 10 * 60;
   final static int DEFAULT_DFS_INPUTSTREAM_CACHE_TTL = 10 * 60;
 
 
-  private final Configuration config;
+  private final NfsConfiguration config;
 
 
   private static class DFSInputStreamCaheKey {
   private static class DFSInputStreamCaheKey {
     final String userId;
     final String userId;
@@ -99,11 +99,11 @@ class DFSClientCache {
     }
     }
   }
   }
 
 
-  DFSClientCache(Configuration config) {
+  DFSClientCache(NfsConfiguration config) {
     this(config, DEFAULT_DFS_CLIENT_CACHE_SIZE);
     this(config, DEFAULT_DFS_CLIENT_CACHE_SIZE);
   }
   }
   
   
-  DFSClientCache(Configuration config, int clientCache) {
+  DFSClientCache(NfsConfiguration config, int clientCache) {
     this.config = config;
     this.config = config;
     this.clientCache = CacheBuilder.newBuilder()
     this.clientCache = CacheBuilder.newBuilder()
         .maximumSize(clientCache)
         .maximumSize(clientCache)

+ 8 - 13
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/Nfs3.java

@@ -20,8 +20,8 @@ package org.apache.hadoop.hdfs.nfs.nfs3;
 import java.io.IOException;
 import java.io.IOException;
 import java.net.DatagramSocket;
 import java.net.DatagramSocket;
 
 
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.nfs.mount.Mountd;
 import org.apache.hadoop.hdfs.nfs.mount.Mountd;
 import org.apache.hadoop.nfs.nfs3.Nfs3Base;
 import org.apache.hadoop.nfs.nfs3.Nfs3Base;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.StringUtils;
@@ -36,16 +36,11 @@ import com.google.common.annotations.VisibleForTesting;
 public class Nfs3 extends Nfs3Base {
 public class Nfs3 extends Nfs3Base {
   private Mountd mountd;
   private Mountd mountd;
   
   
-  static {
-    Configuration.addDefaultResource("hdfs-default.xml");
-    Configuration.addDefaultResource("hdfs-site.xml");
-  }
-  
-  public Nfs3(Configuration conf) throws IOException {
+  public Nfs3(NfsConfiguration conf) throws IOException {
     this(conf, null, true);
     this(conf, null, true);
   }
   }
   
   
-  public Nfs3(Configuration conf, DatagramSocket registrationSocket,
+  public Nfs3(NfsConfiguration conf, DatagramSocket registrationSocket,
       boolean allowInsecurePorts) throws IOException {
       boolean allowInsecurePorts) throws IOException {
     super(new RpcProgramNfs3(conf, registrationSocket, allowInsecurePorts), conf);
     super(new RpcProgramNfs3(conf, registrationSocket, allowInsecurePorts), conf);
     mountd = new Mountd(conf, registrationSocket, allowInsecurePorts);
     mountd = new Mountd(conf, registrationSocket, allowInsecurePorts);
@@ -64,11 +59,11 @@ public class Nfs3 extends Nfs3Base {
   static void startService(String[] args,
   static void startService(String[] args,
       DatagramSocket registrationSocket) throws IOException {
       DatagramSocket registrationSocket) throws IOException {
     StringUtils.startupShutdownMessage(Nfs3.class, args, LOG);
     StringUtils.startupShutdownMessage(Nfs3.class, args, LOG);
-    Configuration conf = new Configuration();
+    NfsConfiguration conf = new NfsConfiguration();
     boolean allowInsecurePorts = conf.getBoolean(
     boolean allowInsecurePorts = conf.getBoolean(
-        DFSConfigKeys.DFS_NFS_ALLOW_INSECURE_PORTS_KEY,
-        DFSConfigKeys.DFS_NFS_ALLOW_INSECURE_PORTS_DEFAULT);
-    final Nfs3 nfsServer = new Nfs3(new Configuration(), registrationSocket,
+        NfsConfigKeys.DFS_NFS_ALLOW_INSECURE_PORTS_KEY,
+        NfsConfigKeys.DFS_NFS_ALLOW_INSECURE_PORTS_DEFAULT);
+    final Nfs3 nfsServer = new Nfs3(conf, registrationSocket,
         allowInsecurePorts);
         allowInsecurePorts);
     nfsServer.startServiceInternal(true);
     nfsServer.startServiceInternal(true);
   }
   }

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java

@@ -37,6 +37,7 @@ import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
 import org.apache.hadoop.hdfs.nfs.nfs3.WriteCtx.DataState;
 import org.apache.hadoop.hdfs.nfs.nfs3.WriteCtx.DataState;
 import org.apache.hadoop.io.BytesWritable.Comparator;
 import org.apache.hadoop.io.BytesWritable.Comparator;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.IOUtils;
@@ -823,7 +824,7 @@ class OpenFileCtx {
    */
    */
   public synchronized boolean streamCleanup(long fileId, long streamTimeout) {
   public synchronized boolean streamCleanup(long fileId, long streamTimeout) {
     Preconditions
     Preconditions
-        .checkState(streamTimeout >= Nfs3Constant.OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT);
+        .checkState(streamTimeout >= NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT);
     if (!activeState) {
     if (!activeState) {
       return true;
       return true;
     }
     }

+ 6 - 6
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtxCache.java

@@ -24,9 +24,9 @@ import java.util.concurrent.ConcurrentMap;
 
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.nfs.nfs3.FileHandle;
 import org.apache.hadoop.nfs.nfs3.FileHandle;
-import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
 import org.apache.hadoop.util.Daemon;
 import org.apache.hadoop.util.Daemon;
 import org.apache.hadoop.util.Time;
 import org.apache.hadoop.util.Time;
 
 
@@ -48,9 +48,9 @@ class OpenFileCtxCache {
   private final long streamTimeout;
   private final long streamTimeout;
   private final StreamMonitor streamMonitor;
   private final StreamMonitor streamMonitor;
 
 
-  OpenFileCtxCache(Configuration config, long streamTimeout) {
-    maxStreams = config.getInt(Nfs3Constant.MAX_OPEN_FILES,
-        Nfs3Constant.MAX_OPEN_FILES_DEFAULT);
+  OpenFileCtxCache(NfsConfiguration config, long streamTimeout) {
+    maxStreams = config.getInt(NfsConfigKeys.DFS_NFS_MAX_OPEN_FILES_KEY,
+        NfsConfigKeys.DFS_NFS_MAX_OPEN_FILES_DEFAULT);
     LOG.info("Maximum open streams is " + maxStreams);
     LOG.info("Maximum open streams is " + maxStreams);
     this.streamTimeout = streamTimeout;
     this.streamTimeout = streamTimeout;
     streamMonitor = new StreamMonitor();
     streamMonitor = new StreamMonitor();
@@ -102,7 +102,7 @@ class OpenFileCtxCache {
     } else {
     } else {
       long idleTime = Time.monotonicNow()
       long idleTime = Time.monotonicNow()
           - idlest.getValue().getLastAccessTime();
           - idlest.getValue().getLastAccessTime();
-      if (idleTime < Nfs3Constant.OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT) {
+      if (idleTime < NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT) {
         if (LOG.isDebugEnabled()) {
         if (LOG.isDebugEnabled()) {
           LOG.debug("idlest stream's idle time:" + idleTime);
           LOG.debug("idlest stream's idle time:" + idleTime);
         }
         }

+ 6 - 7
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/PrivilegedNfsGatewayStarter.java

@@ -21,9 +21,8 @@ import java.net.InetSocketAddress;
 
 
 import org.apache.commons.daemon.Daemon;
 import org.apache.commons.daemon.Daemon;
 import org.apache.commons.daemon.DaemonContext;
 import org.apache.commons.daemon.DaemonContext;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hdfs.DFSConfigKeys;
-import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 
 
 /**
 /**
  * This class is used to allow the initial registration of the NFS gateway with
  * This class is used to allow the initial registration of the NFS gateway with
@@ -42,12 +41,12 @@ public class PrivilegedNfsGatewayStarter implements Daemon {
   @Override
   @Override
   public void init(DaemonContext context) throws Exception {
   public void init(DaemonContext context) throws Exception {
     System.err.println("Initializing privileged NFS client socket...");
     System.err.println("Initializing privileged NFS client socket...");
-    Configuration conf = new HdfsConfiguration();
-    int clientPort = conf.getInt(DFSConfigKeys.DFS_NFS_REGISTRATION_PORT_KEY,
-        DFSConfigKeys.DFS_NFS_REGISTRATION_PORT_DEFAULT);
+    NfsConfiguration conf = new NfsConfiguration();
+    int clientPort = conf.getInt(NfsConfigKeys.DFS_NFS_REGISTRATION_PORT_KEY,
+        NfsConfigKeys.DFS_NFS_REGISTRATION_PORT_DEFAULT);
     if (clientPort < 1 || clientPort > 1023) {
     if (clientPort < 1 || clientPort > 1023) {
       throw new RuntimeException("Must start privileged NFS server with '" +
       throw new RuntimeException("Must start privileged NFS server with '" +
-          DFSConfigKeys.DFS_NFS_REGISTRATION_PORT_KEY + "' configured to a " +
+          NfsConfigKeys.DFS_NFS_REGISTRATION_PORT_KEY + "' configured to a " +
           "privileged port.");
           "privileged port.");
     }
     }
     registrationSocket = new DatagramSocket(
     registrationSocket = new DatagramSocket(

+ 28 - 25
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java

@@ -28,7 +28,6 @@ import java.util.EnumSet;
 
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.CreateFlag;
 import org.apache.hadoop.fs.CreateFlag;
 import org.apache.hadoop.fs.DirectoryListingStartAfterNotFoundException;
 import org.apache.hadoop.fs.DirectoryListingStartAfterNotFoundException;
@@ -41,6 +40,8 @@ import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
@@ -131,9 +132,6 @@ import org.jboss.netty.channel.ChannelHandlerContext;
 
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.annotations.VisibleForTesting;
 
 
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NFS_KEYTAB_FILE_KEY;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NFS_KERBEROS_PRINCIPAL_KEY;
-
 /**
 /**
  * RPC program corresponding to nfs daemon. See {@link Nfs3}.
  * RPC program corresponding to nfs daemon. See {@link Nfs3}.
  */
  */
@@ -144,7 +142,7 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
   
   
   static final Log LOG = LogFactory.getLog(RpcProgramNfs3.class);
   static final Log LOG = LogFactory.getLog(RpcProgramNfs3.class);
 
 
-  private final Configuration config = new Configuration();
+  private final NfsConfiguration config;
   private final WriteManager writeManager;
   private final WriteManager writeManager;
   private final IdUserGroup iug;
   private final IdUserGroup iug;
   private final DFSClientCache clientCache;
   private final DFSClientCache clientCache;
@@ -159,15 +157,17 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
   
   
   private final RpcCallCache rpcCallCache;
   private final RpcCallCache rpcCallCache;
 
 
-  public RpcProgramNfs3(Configuration config, DatagramSocket registrationSocket,
+  public RpcProgramNfs3(NfsConfiguration config, DatagramSocket registrationSocket,
       boolean allowInsecurePorts) throws IOException {
       boolean allowInsecurePorts) throws IOException {
-    super("NFS3", "localhost", config.getInt(Nfs3Constant.NFS3_SERVER_PORT,
-        Nfs3Constant.NFS3_SERVER_PORT_DEFAULT), Nfs3Constant.PROGRAM,
+    super("NFS3", "localhost", config.getInt(
+        NfsConfigKeys.DFS_NFS_SERVER_PORT_KEY,
+        NfsConfigKeys.DFS_NFS_SERVER_PORT_DEFAULT), Nfs3Constant.PROGRAM,
         Nfs3Constant.VERSION, Nfs3Constant.VERSION, registrationSocket,
         Nfs3Constant.VERSION, Nfs3Constant.VERSION, registrationSocket,
         allowInsecurePorts);
         allowInsecurePorts);
    
    
+    this.config = config;
     config.set(FsPermission.UMASK_LABEL, "000");
     config.set(FsPermission.UMASK_LABEL, "000");
-    iug = new IdUserGroup();
+    iug = new IdUserGroup(config);
     
     
     exports = NfsExports.getInstance(config);
     exports = NfsExports.getInstance(config);
     writeManager = new WriteManager(iug, config);
     writeManager = new WriteManager(iug, config);
@@ -180,13 +180,13 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
         CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY,
         CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY,
         CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT);
         CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT);
     
     
-    writeDumpDir = config.get(Nfs3Constant.FILE_DUMP_DIR_KEY,
-        Nfs3Constant.FILE_DUMP_DIR_DEFAULT);
-    boolean enableDump = config.getBoolean(Nfs3Constant.ENABLE_FILE_DUMP_KEY,
-        Nfs3Constant.ENABLE_FILE_DUMP_DEFAULT);
+    writeDumpDir = config.get(NfsConfigKeys.DFS_NFS_FILE_DUMP_DIR_KEY,
+        NfsConfigKeys.DFS_NFS_FILE_DUMP_DIR_DEFAULT);
+    boolean enableDump = config.getBoolean(NfsConfigKeys.DFS_NFS_FILE_DUMP_KEY,
+        NfsConfigKeys.DFS_NFS_FILE_DUMP_DEFAULT);
     UserGroupInformation.setConfiguration(config);
     UserGroupInformation.setConfiguration(config);
-    SecurityUtil.login(config, DFS_NFS_KEYTAB_FILE_KEY,
-            DFS_NFS_KERBEROS_PRINCIPAL_KEY);
+    SecurityUtil.login(config, NfsConfigKeys.DFS_NFS_KEYTAB_FILE_KEY,
+        NfsConfigKeys.DFS_NFS_KERBEROS_PRINCIPAL_KEY);
 
 
     if (!enableDump) {
     if (!enableDump) {
       writeDumpDir = null;
       writeDumpDir = null;
@@ -567,8 +567,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
             + handle.getFileId());
             + handle.getFileId());
         return new READLINK3Response(Nfs3Status.NFS3ERR_SERVERFAULT);
         return new READLINK3Response(Nfs3Status.NFS3ERR_SERVERFAULT);
       }
       }
-      int rtmax = config.getInt(Nfs3Constant.MAX_READ_TRANSFER_SIZE_KEY,
-              Nfs3Constant.MAX_READ_TRANSFER_SIZE_DEFAULT);
+      int rtmax = config.getInt(NfsConfigKeys.DFS_NFS_MAX_READ_TRANSFER_SIZE_KEY,
+          NfsConfigKeys.DFS_NFS_MAX_READ_TRANSFER_SIZE_DEFAULT);
       if (rtmax < target.getBytes().length) {
       if (rtmax < target.getBytes().length) {
         LOG.error("Link size: " + target.getBytes().length
         LOG.error("Link size: " + target.getBytes().length
             + " is larger than max transfer size: " + rtmax);
             + " is larger than max transfer size: " + rtmax);
@@ -665,8 +665,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
     }
     }
 
 
     try {
     try {
-      int rtmax = config.getInt(Nfs3Constant.MAX_READ_TRANSFER_SIZE_KEY,
-              Nfs3Constant.MAX_READ_TRANSFER_SIZE_DEFAULT);
+      int rtmax = config.getInt(NfsConfigKeys.DFS_NFS_MAX_READ_TRANSFER_SIZE_KEY,
+          NfsConfigKeys.DFS_NFS_MAX_READ_TRANSFER_SIZE_DEFAULT);
       int buffSize = Math.min(rtmax, count);
       int buffSize = Math.min(rtmax, count);
       byte[] readbuffer = new byte[buffSize];
       byte[] readbuffer = new byte[buffSize];
 
 
@@ -1740,12 +1740,15 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
     }
     }
 
 
     try {
     try {
-      int rtmax = config.getInt(Nfs3Constant.MAX_READ_TRANSFER_SIZE_KEY,
-              Nfs3Constant.MAX_READ_TRANSFER_SIZE_DEFAULT);
-      int wtmax = config.getInt(Nfs3Constant.MAX_WRITE_TRANSFER_SIZE_KEY,
-              Nfs3Constant.MAX_WRITE_TRANSFER_SIZE_DEFAULT);
-      int dtperf = config.getInt(Nfs3Constant.MAX_READDIR_TRANSFER_SIZE_KEY,
-              Nfs3Constant.MAX_READDIR_TRANSFER_SIZE_DEFAULT);
+      int rtmax = config.getInt(
+          NfsConfigKeys.DFS_NFS_MAX_READ_TRANSFER_SIZE_KEY,
+          NfsConfigKeys.DFS_NFS_MAX_READ_TRANSFER_SIZE_DEFAULT);
+      int wtmax = config.getInt(
+          NfsConfigKeys.DFS_NFS_MAX_WRITE_TRANSFER_SIZE_KEY,
+          NfsConfigKeys.DFS_NFS_MAX_WRITE_TRANSFER_SIZE_DEFAULT);
+      int dtperf = config.getInt(
+          NfsConfigKeys.DFS_NFS_MAX_READDIR_TRANSFER_SIZE_KEY,
+          NfsConfigKeys.DFS_NFS_MAX_READDIR_TRANSFER_SIZE_DEFAULT);
 
 
       Nfs3FileAttributes attrs = Nfs3Utils.getFileAttr(dfsClient,
       Nfs3FileAttributes attrs = Nfs3Utils.getFileAttr(dfsClient,
           Nfs3Utils.getFileIdPath(handle), iug);
           Nfs3Utils.getFileIdPath(handle), iug);

+ 13 - 12
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteManager.java

@@ -21,10 +21,11 @@ import java.io.IOException;
 
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.nfs.nfs3.OpenFileCtx.COMMIT_STATUS;
 import org.apache.hadoop.hdfs.nfs.nfs3.OpenFileCtx.COMMIT_STATUS;
 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.ipc.RemoteException;
@@ -50,7 +51,7 @@ import com.google.common.annotations.VisibleForTesting;
 public class WriteManager {
 public class WriteManager {
   public static final Log LOG = LogFactory.getLog(WriteManager.class);
   public static final Log LOG = LogFactory.getLog(WriteManager.class);
 
 
-  private final Configuration config;
+  private final NfsConfiguration config;
   private final IdUserGroup iug;
   private final IdUserGroup iug;
  
  
   private AsyncDataService asyncDataService;
   private AsyncDataService asyncDataService;
@@ -78,19 +79,19 @@ public class WriteManager {
     return fileContextCache.put(h, ctx);
     return fileContextCache.put(h, ctx);
   }
   }
   
   
-  WriteManager(IdUserGroup iug, final Configuration config) {
+  WriteManager(IdUserGroup iug, final NfsConfiguration config) {
     this.iug = iug;
     this.iug = iug;
     this.config = config;
     this.config = config;
-    streamTimeout = config.getLong(Nfs3Constant.OUTPUT_STREAM_TIMEOUT,
-        Nfs3Constant.OUTPUT_STREAM_TIMEOUT_DEFAULT);
+    streamTimeout = config.getLong(NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_KEY,
+        NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_DEFAULT);
     LOG.info("Stream timeout is " + streamTimeout + "ms.");
     LOG.info("Stream timeout is " + streamTimeout + "ms.");
-    if (streamTimeout < Nfs3Constant.OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT) {
+    if (streamTimeout < NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT) {
       LOG.info("Reset stream timeout to minimum value "
       LOG.info("Reset stream timeout to minimum value "
-          + Nfs3Constant.OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT + "ms.");
-      streamTimeout = Nfs3Constant.OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT;
+          + NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT + "ms.");
+      streamTimeout = NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT;
     }
     }
-    maxStreams = config.getInt(Nfs3Constant.MAX_OPEN_FILES,
-        Nfs3Constant.MAX_OPEN_FILES_DEFAULT);
+    maxStreams = config.getInt(NfsConfigKeys.DFS_NFS_MAX_OPEN_FILES_KEY,
+        NfsConfigKeys.DFS_NFS_MAX_OPEN_FILES_DEFAULT);
     LOG.info("Maximum open streams is "+ maxStreams);
     LOG.info("Maximum open streams is "+ maxStreams);
     this.fileContextCache = new OpenFileCtxCache(config, streamTimeout);
     this.fileContextCache = new OpenFileCtxCache(config, streamTimeout);
   }
   }
@@ -171,8 +172,8 @@ public class WriteManager {
       }
       }
 
 
       // Add open stream
       // Add open stream
-      String writeDumpDir = config.get(Nfs3Constant.FILE_DUMP_DIR_KEY,
-          Nfs3Constant.FILE_DUMP_DIR_DEFAULT);
+      String writeDumpDir = config.get(NfsConfigKeys.DFS_NFS_FILE_DUMP_DIR_KEY,
+          NfsConfigKeys.DFS_NFS_FILE_DUMP_DIR_DEFAULT);
       openFileCtx = new OpenFileCtx(fos, latestAttr, writeDumpDir + "/"
       openFileCtx = new OpenFileCtx(fos, latestAttr, writeDumpDir + "/"
           + fileHandle.getFileId(), dfsClient, iug);
           + fileHandle.getFileId(), dfsClient, iug);
 
 

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestMountd.java

@@ -23,8 +23,8 @@ import java.net.InetAddress;
 
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.nfs.mount.RpcProgramMountd;
 import org.apache.hadoop.hdfs.nfs.mount.RpcProgramMountd;
 import org.apache.hadoop.hdfs.nfs.nfs3.Nfs3;
 import org.apache.hadoop.hdfs.nfs.nfs3.Nfs3;
 import org.apache.hadoop.hdfs.nfs.nfs3.RpcProgramNfs3;
 import org.apache.hadoop.hdfs.nfs.nfs3.RpcProgramNfs3;
@@ -38,7 +38,7 @@ public class TestMountd {
   @Test
   @Test
   public void testStart() throws IOException {
   public void testStart() throws IOException {
     // Start minicluster
     // Start minicluster
-    Configuration config = new Configuration();
+    NfsConfiguration config = new NfsConfiguration();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(config).numDataNodes(1)
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(config).numDataNodes(1)
         .build();
         .build();
     cluster.waitActive();
     cluster.waitActive();

+ 5 - 4
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestOutOfOrderWrite.java

@@ -23,7 +23,8 @@ import java.util.Arrays;
 
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.nfs.nfs3.Nfs3Utils;
 import org.apache.hadoop.hdfs.nfs.nfs3.Nfs3Utils;
 import org.apache.hadoop.nfs.nfs3.FileHandle;
 import org.apache.hadoop.nfs.nfs3.FileHandle;
 import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
 import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
@@ -155,10 +156,10 @@ public class TestOutOfOrderWrite {
     Arrays.fill(data3, (byte) 9);
     Arrays.fill(data3, (byte) 9);
 
 
     // NFS3 Create request
     // NFS3 Create request
-    Configuration conf = new Configuration();
+    NfsConfiguration conf = new NfsConfiguration();
     WriteClient client = new WriteClient("localhost", conf.getInt(
     WriteClient client = new WriteClient("localhost", conf.getInt(
-        Nfs3Constant.NFS3_SERVER_PORT, Nfs3Constant.NFS3_SERVER_PORT_DEFAULT),
-        create(), false);
+        NfsConfigKeys.DFS_NFS_SERVER_PORT_KEY,
+        NfsConfigKeys.DFS_NFS_SERVER_PORT_DEFAULT), create(), false);
     client.run();
     client.run();
 
 
     while (handle == null) {
     while (handle == null) {

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestReaddir.java

@@ -24,11 +24,11 @@ import java.io.IOException;
 import java.net.InetAddress;
 import java.net.InetAddress;
 import java.util.List;
 import java.util.List;
 
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.nfs.nfs3.Nfs3;
 import org.apache.hadoop.hdfs.nfs.nfs3.Nfs3;
 import org.apache.hadoop.hdfs.nfs.nfs3.RpcProgramNfs3;
 import org.apache.hadoop.hdfs.nfs.nfs3.RpcProgramNfs3;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
@@ -53,7 +53,7 @@ import org.mockito.Mockito;
  */
  */
 public class TestReaddir {
 public class TestReaddir {
 
 
-  static Configuration config = new Configuration();
+  static NfsConfiguration config = new NfsConfiguration();
   static MiniDFSCluster cluster = null;
   static MiniDFSCluster cluster = null;
   static DistributedFileSystem hdfs;
   static DistributedFileSystem hdfs;
   static NameNode nn;
   static NameNode nn;

+ 6 - 6
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestDFSClientCache.java

@@ -18,24 +18,24 @@
 package org.apache.hadoop.hdfs.nfs.nfs3;
 package org.apache.hadoop.hdfs.nfs.nfs3;
 
 
 import static org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod.KERBEROS;
 import static org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod.KERBEROS;
+import static org.hamcrest.core.Is.is;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertThat;
-import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertTrue;
 
 
 import java.io.IOException;
 import java.io.IOException;
 
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSClient;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.junit.Test;
 import org.junit.Test;
 
 
 public class TestDFSClientCache {
 public class TestDFSClientCache {
   @Test
   @Test
   public void testEviction() throws IOException {
   public void testEviction() throws IOException {
-    Configuration conf = new Configuration();
+    NfsConfiguration conf = new NfsConfiguration();
     conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "hdfs://localhost");
     conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "hdfs://localhost");
 
 
     // Only one entry will be in the cache
     // Only one entry will be in the cache
@@ -59,7 +59,7 @@ public class TestDFSClientCache {
     String currentUser = "test-user";
     String currentUser = "test-user";
 
 
 
 
-    Configuration conf = new Configuration();
+    NfsConfiguration conf = new NfsConfiguration();
     UserGroupInformation currentUserUgi
     UserGroupInformation currentUserUgi
             = UserGroupInformation.createRemoteUser(currentUser);
             = UserGroupInformation.createRemoteUser(currentUser);
     currentUserUgi.setAuthenticationMethod(KERBEROS);
     currentUserUgi.setAuthenticationMethod(KERBEROS);
@@ -83,7 +83,7 @@ public class TestDFSClientCache {
 
 
     UserGroupInformation currentUserUgi = UserGroupInformation
     UserGroupInformation currentUserUgi = UserGroupInformation
             .createUserForTesting(currentUser, new String[0]);
             .createUserForTesting(currentUser, new String[0]);
-    Configuration conf = new Configuration();
+    NfsConfiguration conf = new NfsConfiguration();
     conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "hdfs://localhost");
     conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "hdfs://localhost");
     DFSClientCache cache = new DFSClientCache(conf);
     DFSClientCache cache = new DFSClientCache(conf);
     UserGroupInformation ugiResult
     UserGroupInformation ugiResult

+ 4 - 4
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestExportsTable.java

@@ -21,22 +21,22 @@ import static org.junit.Assert.assertTrue;
 
 
 import java.io.IOException;
 import java.io.IOException;
 
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.nfs.mount.Mountd;
 import org.apache.hadoop.hdfs.nfs.mount.Mountd;
 import org.apache.hadoop.hdfs.nfs.mount.RpcProgramMountd;
 import org.apache.hadoop.hdfs.nfs.mount.RpcProgramMountd;
-import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
 import org.junit.Test;
 import org.junit.Test;
 
 
 public class TestExportsTable {
 public class TestExportsTable {
  
  
   @Test
   @Test
   public void testExportPoint() throws IOException {
   public void testExportPoint() throws IOException {
-    Configuration config = new Configuration();
+    NfsConfiguration config = new NfsConfiguration();
     MiniDFSCluster cluster = null;
     MiniDFSCluster cluster = null;
 
 
     String exportPoint = "/myexport1";
     String exportPoint = "/myexport1";
-    config.setStrings(Nfs3Constant.EXPORT_POINT, exportPoint);
+    config.setStrings(NfsConfigKeys.DFS_NFS_EXPORT_POINT_KEY, exportPoint);
     // Use emphral port in case tests are running in parallel
     // Use emphral port in case tests are running in parallel
     config.setInt("nfs3.mountd.port", 0);
     config.setInt("nfs3.mountd.port", 0);
     config.setInt("nfs3.server.port", 0);
     config.setInt("nfs3.server.port", 0);

+ 20 - 20
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestOpenFileCtxCache.java

@@ -22,13 +22,13 @@ import static org.junit.Assert.assertTrue;
 
 
 import java.io.IOException;
 import java.io.IOException;
 
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.nfs.nfs3.OpenFileCtx.CommitCtx;
 import org.apache.hadoop.hdfs.nfs.nfs3.OpenFileCtx.CommitCtx;
 import org.apache.hadoop.nfs.nfs3.FileHandle;
 import org.apache.hadoop.nfs.nfs3.FileHandle;
 import org.apache.hadoop.nfs.nfs3.IdUserGroup;
 import org.apache.hadoop.nfs.nfs3.IdUserGroup;
-import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
 import org.apache.hadoop.nfs.nfs3.Nfs3FileAttributes;
 import org.apache.hadoop.nfs.nfs3.Nfs3FileAttributes;
 import org.junit.Test;
 import org.junit.Test;
 import org.mockito.Mockito;
 import org.mockito.Mockito;
@@ -38,10 +38,10 @@ public class TestOpenFileCtxCache {
 
 
   @Test
   @Test
   public void testEviction() throws IOException, InterruptedException {
   public void testEviction() throws IOException, InterruptedException {
-    Configuration conf = new Configuration();
+    NfsConfiguration conf = new NfsConfiguration();
 
 
     // Only two entries will be in the cache
     // Only two entries will be in the cache
-    conf.setInt(Nfs3Constant.MAX_OPEN_FILES, 2);
+    conf.setInt(NfsConfigKeys.DFS_NFS_MAX_OPEN_FILES_KEY, 2);
 
 
     DFSClient dfsClient = Mockito.mock(DFSClient.class);
     DFSClient dfsClient = Mockito.mock(DFSClient.class);
     Nfs3FileAttributes attr = new Nfs3FileAttributes();
     Nfs3FileAttributes attr = new Nfs3FileAttributes();
@@ -49,15 +49,15 @@ public class TestOpenFileCtxCache {
     Mockito.when(fos.getPos()).thenReturn((long) 0);
     Mockito.when(fos.getPos()).thenReturn((long) 0);
 
 
     OpenFileCtx context1 = new OpenFileCtx(fos, attr, "/dumpFilePath",
     OpenFileCtx context1 = new OpenFileCtx(fos, attr, "/dumpFilePath",
-        dfsClient, new IdUserGroup());
+        dfsClient, new IdUserGroup(new NfsConfiguration()));
     OpenFileCtx context2 = new OpenFileCtx(fos, attr, "/dumpFilePath",
     OpenFileCtx context2 = new OpenFileCtx(fos, attr, "/dumpFilePath",
-        dfsClient, new IdUserGroup());
+        dfsClient, new IdUserGroup(new NfsConfiguration()));
     OpenFileCtx context3 = new OpenFileCtx(fos, attr, "/dumpFilePath",
     OpenFileCtx context3 = new OpenFileCtx(fos, attr, "/dumpFilePath",
-        dfsClient, new IdUserGroup());
+        dfsClient, new IdUserGroup(new NfsConfiguration()));
     OpenFileCtx context4 = new OpenFileCtx(fos, attr, "/dumpFilePath",
     OpenFileCtx context4 = new OpenFileCtx(fos, attr, "/dumpFilePath",
-        dfsClient, new IdUserGroup());
+        dfsClient, new IdUserGroup(new NfsConfiguration()));
     OpenFileCtx context5 = new OpenFileCtx(fos, attr, "/dumpFilePath",
     OpenFileCtx context5 = new OpenFileCtx(fos, attr, "/dumpFilePath",
-        dfsClient, new IdUserGroup());
+        dfsClient, new IdUserGroup(new NfsConfiguration()));
 
 
     OpenFileCtxCache cache = new OpenFileCtxCache(conf, 10 * 60 * 100);
     OpenFileCtxCache cache = new OpenFileCtxCache(conf, 10 * 60 * 100);
 
 
@@ -71,7 +71,7 @@ public class TestOpenFileCtxCache {
     assertTrue(cache.size() == 2);
     assertTrue(cache.size() == 2);
 
 
     // Wait for the oldest stream to be evict-able, insert again
     // Wait for the oldest stream to be evict-able, insert again
-    Thread.sleep(Nfs3Constant.OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT);
+    Thread.sleep(NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT);
     assertTrue(cache.size() == 2);
     assertTrue(cache.size() == 2);
 
 
     ret = cache.put(new FileHandle(3), context3);
     ret = cache.put(new FileHandle(3), context3);
@@ -90,17 +90,17 @@ public class TestOpenFileCtxCache {
         new WriteCtx(null, 0, 0, 0, null, null, null, 0, false, null));
         new WriteCtx(null, 0, 0, 0, null, null, null, 0, false, null));
     context4.getPendingCommitsForTest().put(new Long(100),
     context4.getPendingCommitsForTest().put(new Long(100),
         new CommitCtx(0, null, 0, attr));
         new CommitCtx(0, null, 0, attr));
-    Thread.sleep(Nfs3Constant.OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT);
+    Thread.sleep(NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT);
     ret = cache.put(new FileHandle(5), context5);
     ret = cache.put(new FileHandle(5), context5);
     assertFalse(ret);
     assertFalse(ret);
   }
   }
 
 
   @Test
   @Test
   public void testScan() throws IOException, InterruptedException {
   public void testScan() throws IOException, InterruptedException {
-    Configuration conf = new Configuration();
+    NfsConfiguration conf = new NfsConfiguration();
 
 
     // Only two entries will be in the cache
     // Only two entries will be in the cache
-    conf.setInt(Nfs3Constant.MAX_OPEN_FILES, 2);
+    conf.setInt(NfsConfigKeys.DFS_NFS_MAX_OPEN_FILES_KEY, 2);
 
 
     DFSClient dfsClient = Mockito.mock(DFSClient.class);
     DFSClient dfsClient = Mockito.mock(DFSClient.class);
     Nfs3FileAttributes attr = new Nfs3FileAttributes();
     Nfs3FileAttributes attr = new Nfs3FileAttributes();
@@ -108,13 +108,13 @@ public class TestOpenFileCtxCache {
     Mockito.when(fos.getPos()).thenReturn((long) 0);
     Mockito.when(fos.getPos()).thenReturn((long) 0);
 
 
     OpenFileCtx context1 = new OpenFileCtx(fos, attr, "/dumpFilePath",
     OpenFileCtx context1 = new OpenFileCtx(fos, attr, "/dumpFilePath",
-        dfsClient, new IdUserGroup());
+        dfsClient, new IdUserGroup(new NfsConfiguration()));
     OpenFileCtx context2 = new OpenFileCtx(fos, attr, "/dumpFilePath",
     OpenFileCtx context2 = new OpenFileCtx(fos, attr, "/dumpFilePath",
-        dfsClient, new IdUserGroup());
+        dfsClient, new IdUserGroup(new NfsConfiguration()));
     OpenFileCtx context3 = new OpenFileCtx(fos, attr, "/dumpFilePath",
     OpenFileCtx context3 = new OpenFileCtx(fos, attr, "/dumpFilePath",
-        dfsClient, new IdUserGroup());
+        dfsClient, new IdUserGroup(new NfsConfiguration()));
     OpenFileCtx context4 = new OpenFileCtx(fos, attr, "/dumpFilePath",
     OpenFileCtx context4 = new OpenFileCtx(fos, attr, "/dumpFilePath",
-        dfsClient, new IdUserGroup());
+        dfsClient, new IdUserGroup(new NfsConfiguration()));
 
 
     OpenFileCtxCache cache = new OpenFileCtxCache(conf, 10 * 60 * 100);
     OpenFileCtxCache cache = new OpenFileCtxCache(conf, 10 * 60 * 100);
 
 
@@ -123,8 +123,8 @@ public class TestOpenFileCtxCache {
     assertTrue(ret);
     assertTrue(ret);
     ret = cache.put(new FileHandle(2), context2);
     ret = cache.put(new FileHandle(2), context2);
     assertTrue(ret);
     assertTrue(ret);
-    Thread.sleep(Nfs3Constant.OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT + 1);
-    cache.scan(Nfs3Constant.OUTPUT_STREAM_TIMEOUT_MIN_DEFAULT);
+    Thread.sleep(NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT + 1);
+    cache.scan(NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_MIN_DEFAULT);
     assertTrue(cache.size() == 0);
     assertTrue(cache.size() == 0);
 
 
     // Test cleaning inactive entry
     // Test cleaning inactive entry
@@ -133,7 +133,7 @@ public class TestOpenFileCtxCache {
     ret = cache.put(new FileHandle(4), context4);
     ret = cache.put(new FileHandle(4), context4);
     assertTrue(ret);
     assertTrue(ret);
     context3.setActiveStatusForTest(false);
     context3.setActiveStatusForTest(false);
-    cache.scan(Nfs3Constant.OUTPUT_STREAM_TIMEOUT_DEFAULT);
+    cache.scan(NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_DEFAULT);
     assertTrue(cache.size() == 1);
     assertTrue(cache.size() == 1);
     assertTrue(cache.get(new FileHandle(3)) == null);
     assertTrue(cache.get(new FileHandle(3)) == null);
     assertTrue(cache.get(new FileHandle(4)) != null);
     assertTrue(cache.get(new FileHandle(4)) != null);

+ 42 - 0
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestRpcProgramNfs3.java

@@ -17,6 +17,11 @@
  */
  */
 package org.apache.hadoop.hdfs.nfs.nfs3;
 package org.apache.hadoop.hdfs.nfs.nfs3;
 
 
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
 import org.apache.hadoop.nfs.nfs3.Nfs3Constant;
 import org.junit.Assert;
 import org.junit.Assert;
 import org.junit.Test;
 import org.junit.Test;
@@ -63,4 +68,41 @@ public class TestRpcProgramNfs3 {
       }
       }
     }
     }
   }
   }
+
+  @Test
+  public void testDeprecatedKeys() {
+    NfsConfiguration conf = new NfsConfiguration();
+    conf.setInt("nfs3.server.port", 998);
+    assertTrue(conf.getInt(NfsConfigKeys.DFS_NFS_SERVER_PORT_KEY, 0) == 998);
+
+    conf.setInt("nfs3.mountd.port", 999);
+    assertTrue(conf.getInt(NfsConfigKeys.DFS_NFS_MOUNTD_PORT_KEY, 0) == 999);
+
+    conf.set("dfs.nfs.exports.allowed.hosts", "host1");
+    assertTrue(conf.get(CommonConfigurationKeys.NFS_EXPORTS_ALLOWED_HOSTS_KEY)
+        .equals("host1"));
+
+    conf.setInt("dfs.nfs.exports.cache.expirytime.millis", 1000);
+    assertTrue(conf.getInt(
+        Nfs3Constant.NFS_EXPORTS_CACHE_EXPIRYTIME_MILLIS_KEY, 0) == 1000);
+
+    conf.setInt("hadoop.nfs.userupdate.milly", 10);
+    assertTrue(conf.getInt(Nfs3Constant.NFS_USERGROUP_UPDATE_MILLIS_KEY, 0) == 10);
+
+    conf.set("dfs.nfs3.dump.dir", "/nfs/tmp");
+    assertTrue(conf.get(NfsConfigKeys.DFS_NFS_FILE_DUMP_DIR_KEY).equals(
+        "/nfs/tmp"));
+
+    conf.setBoolean("dfs.nfs3.enableDump", false);
+    assertTrue(conf.getBoolean(NfsConfigKeys.DFS_NFS_FILE_DUMP_KEY, true) == false);
+
+    conf.setInt("dfs.nfs3.max.open.files", 500);
+    assertTrue(conf.getInt(NfsConfigKeys.DFS_NFS_MAX_OPEN_FILES_KEY, 0) == 500);
+
+    conf.setInt("dfs.nfs3.stream.timeout", 6000);
+    assertTrue(conf.getInt(NfsConfigKeys.DFS_NFS_STREAM_TIMEOUT_KEY, 0) == 6000);
+
+    conf.set("dfs.nfs3.export.point", "/dir1");
+    assertTrue(conf.get(NfsConfigKeys.DFS_NFS_EXPORT_POINT_KEY).equals("/dir1"));
+  }
 }
 }

+ 6 - 6
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java

@@ -27,11 +27,10 @@ import java.nio.ByteBuffer;
 import java.util.Arrays;
 import java.util.Arrays;
 import java.util.concurrent.ConcurrentNavigableMap;
 import java.util.concurrent.ConcurrentNavigableMap;
 
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSClient;
-import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
+import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration;
 import org.apache.hadoop.hdfs.nfs.nfs3.OpenFileCtx.COMMIT_STATUS;
 import org.apache.hadoop.hdfs.nfs.nfs3.OpenFileCtx.COMMIT_STATUS;
 import org.apache.hadoop.hdfs.nfs.nfs3.OpenFileCtx.CommitCtx;
 import org.apache.hadoop.hdfs.nfs.nfs3.OpenFileCtx.CommitCtx;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
@@ -139,7 +138,7 @@ public class TestWrites {
     Mockito.when(fos.getPos()).thenReturn((long) 0);
     Mockito.when(fos.getPos()).thenReturn((long) 0);
 
 
     OpenFileCtx ctx = new OpenFileCtx(fos, attr, "/dumpFilePath", dfsClient,
     OpenFileCtx ctx = new OpenFileCtx(fos, attr, "/dumpFilePath", dfsClient,
-        new IdUserGroup());
+        new IdUserGroup(new NfsConfiguration()));
 
 
     COMMIT_STATUS ret;
     COMMIT_STATUS ret;
 
 
@@ -201,13 +200,14 @@ public class TestWrites {
     Nfs3FileAttributes attr = new Nfs3FileAttributes();
     Nfs3FileAttributes attr = new Nfs3FileAttributes();
     HdfsDataOutputStream fos = Mockito.mock(HdfsDataOutputStream.class);
     HdfsDataOutputStream fos = Mockito.mock(HdfsDataOutputStream.class);
     Mockito.when(fos.getPos()).thenReturn((long) 0);
     Mockito.when(fos.getPos()).thenReturn((long) 0);
+    NfsConfiguration config = new NfsConfiguration();
 
 
     OpenFileCtx ctx = new OpenFileCtx(fos, attr, "/dumpFilePath", dfsClient,
     OpenFileCtx ctx = new OpenFileCtx(fos, attr, "/dumpFilePath", dfsClient,
-        new IdUserGroup());
+        new IdUserGroup(config));
 
 
     FileHandle h = new FileHandle(1); // fake handle for "/dumpFilePath"
     FileHandle h = new FileHandle(1); // fake handle for "/dumpFilePath"
     COMMIT_STATUS ret;
     COMMIT_STATUS ret;
-    WriteManager wm = new WriteManager(new IdUserGroup(), new Configuration());
+    WriteManager wm = new WriteManager(new IdUserGroup(config), config);
     assertTrue(wm.addOpenFileStream(h, ctx));
     assertTrue(wm.addOpenFileStream(h, ctx));
     
     
     // Test inactive open file context
     // Test inactive open file context
@@ -280,7 +280,7 @@ public class TestWrites {
 
 
   @Test
   @Test
   public void testWriteStableHow() throws IOException, InterruptedException {
   public void testWriteStableHow() throws IOException, InterruptedException {
-    HdfsConfiguration config = new HdfsConfiguration();
+    NfsConfiguration config = new NfsConfiguration();
     DFSClient client = null;
     DFSClient client = null;
     MiniDFSCluster cluster = null;
     MiniDFSCluster cluster = null;
     RpcProgramNfs3 nfsd;
     RpcProgramNfs3 nfsd;

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/resources/core-site.xml

@@ -18,12 +18,12 @@
 
 
 <configuration>
 <configuration>
 <property>
 <property>
-  <name>nfs3.server.port</name>
+  <name>nfs.server.port</name>
   <value>2079</value>
   <value>2079</value>
 </property>
 </property>
 
 
 <property>
 <property>
-  <name>nfs3.mountd.port</name>
+  <name>nfs.mountd.port</name>
   <value>4272</value>
   <value>4272</value>
 </property>
 </property>
 </configuration>
 </configuration>

+ 11 - 1
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -452,7 +452,14 @@ Release 2.5.0 - UNRELEASED
     HDFS-6356. Fix typo in DatanodeLayoutVersion. (Tulasi G via wang)
     HDFS-6356. Fix typo in DatanodeLayoutVersion. (Tulasi G via wang)
 
 
     HDFS-6447. balancer should timestamp the completion message.
     HDFS-6447. balancer should timestamp the completion message.
-    (Juan Yu via wang).
+    (Juan Yu via wang)
+
+    HDFS-6463. Clarify behavior of AclStorage#createFsPermissionForExtendedAcl
+    in comments. (cnauroth)
+
+    HDFS-6472. fix typo in webapps/hdfs/explorer.js. (Juan Yu via wang)
+
+    HDFS-6056. Clean up NFS config settings (brandonli)
 
 
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
@@ -617,6 +624,9 @@ Release 2.5.0 - UNRELEASED
 
 
     HDFS-6462. NFS: fsstat request fails with the secure hdfs (brandonli)
     HDFS-6462. NFS: fsstat request fails with the secure hdfs (brandonli)
 
 
+    HDFS-6404. HttpFS should use a 000 umask for mkdir and create 
+    operations. (yoderme via tucu)
+
 Release 2.4.1 - UNRELEASED
 Release 2.4.1 - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 0 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

@@ -639,12 +639,6 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String DFS_DFSCLIENT_HEDGED_READ_THREADPOOL_SIZE =
   public static final String DFS_DFSCLIENT_HEDGED_READ_THREADPOOL_SIZE =
       "dfs.client.hedged.read.threadpool.size";
       "dfs.client.hedged.read.threadpool.size";
   public static final int     DEFAULT_DFSCLIENT_HEDGED_READ_THREADPOOL_SIZE = 0;
   public static final int     DEFAULT_DFSCLIENT_HEDGED_READ_THREADPOOL_SIZE = 0;
-  public static final String  DFS_NFS_KEYTAB_FILE_KEY = "dfs.nfs.keytab.file";
-  public static final String  DFS_NFS_KERBEROS_PRINCIPAL_KEY = "dfs.nfs.kerberos.principal";
-  public static final String  DFS_NFS_REGISTRATION_PORT_KEY = "dfs.nfs.registration.port";
-  public static final int     DFS_NFS_REGISTRATION_PORT_DEFAULT = 40; // Currently unassigned.
-  public static final String  DFS_NFS_ALLOW_INSECURE_PORTS_KEY = "dfs.nfs.allow.insecure.ports";
-  public static final boolean DFS_NFS_ALLOW_INSECURE_PORTS_DEFAULT = true;
 
 
    // Slow io warning log threshold settings for dfsclient and datanode.
    // Slow io warning log threshold settings for dfsclient and datanode.
    public static final String DFS_CLIENT_SLOW_IO_WARNING_THRESHOLD_KEY =
    public static final String DFS_CLIENT_SLOW_IO_WARNING_THRESHOLD_KEY =

+ 4 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AclStorage.java

@@ -338,6 +338,10 @@ final class AclStorage {
    * ACL, based on its access ACL entries.  For a correctly sorted ACL, the
    * ACL, based on its access ACL entries.  For a correctly sorted ACL, the
    * first entry is the owner and the last 2 entries are the mask and other
    * first entry is the owner and the last 2 entries are the mask and other
    * entries respectively.  Also preserve sticky bit and toggle ACL bit on.
    * entries respectively.  Also preserve sticky bit and toggle ACL bit on.
+   * Note that this method intentionally copies the permissions of the mask
+   * entry into the FsPermission group permissions.  This is consistent with the
+   * POSIX ACLs model, which presents the mask as the permissions of the group
+   * class.
    *
    *
    * @param accessEntries List<AclEntry> access ACL entries
    * @param accessEntries List<AclEntry> access ACL entries
    * @param existingPerm FsPermission existing permissions
    * @param existingPerm FsPermission existing permissions

+ 51 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

@@ -1296,8 +1296,56 @@
     non-zero integer.
     non-zero integer.
   </description>
   </description>
 </property>
 </property>
+
+<property>
+  <name>nfs.server.port</name>
+  <value>2049</value>
+  <description>
+      Specify the port number used by Hadoop NFS.
+  </description>
+</property>
+
+<property>
+  <name>nfs.mountd.port</name>
+  <value>4242</value>
+  <description>
+      Specify the port number used by Hadoop mount daemon.
+  </description>
+</property>
+
+<property>    
+  <name>nfs.dump.dir</name>
+  <value>/tmp/.hdfs-nfs</value>
+  <description>
+    This directory is used to temporarily save out-of-order writes before
+    writing to HDFS. For each file, the out-of-order writes are dumped after
+    they are accumulated to exceed certain threshold (e.g., 1MB) in memory. 
+    One needs to make sure the directory has enough space.
+  </description>
+</property>
+
+<property>
+  <name>nfs.rtmax</name>
+  <value>1048576</value>
+  <description>This is the maximum size in bytes of a READ request
+    supported by the NFS gateway. If you change this, make sure you
+    also update the nfs mount's rsize(add rsize= # of bytes to the 
+    mount directive).
+  </description>
+</property>
+
+<property>
+  <name>nfs.wtmax</name>
+  <value>1048576</value>
+  <description>This is the maximum size in bytes of a WRITE request
+    supported by the NFS gateway. If you change this, make sure you
+    also update the nfs mount's wsize(add wsize= # of bytes to the 
+    mount directive).
+  </description>
+</property>
+
 <property>
 <property>
-  <name>dfs.nfs.keytab.file</name>
+  <name>nfs.keytab.file</name>
   <value></value>
   <value></value>
   <description>
   <description>
     *Note*: Advanced property. Change with caution.
     *Note*: Advanced property. Change with caution.
@@ -1307,7 +1355,7 @@
 </property>
 </property>
 
 
 <property>
 <property>
-  <name>dfs.nfs.kerberos.principal</name>
+  <name>nfs.kerberos.principal</name>
   <value></value>
   <value></value>
   <description>
   <description>
     *Note*: Advanced property. Change with caution.
     *Note*: Advanced property. Change with caution.
@@ -1318,7 +1366,7 @@
 </property>
 </property>
 
 
 <property>
 <property>
-  <name>dfs.nfs.allow.insecure.ports</name>
+  <name>nfs.allow.insecure.ports</name>
   <value>true</value>
   <value>true</value>
   <description>
   <description>
     When set to false, client connections originating from unprivileged ports
     When set to false, client connections originating from unprivileged ports

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js

@@ -45,7 +45,7 @@
     return function (jqxhr, text, err) {
     return function (jqxhr, text, err) {
       switch(jqxhr.status) {
       switch(jqxhr.status) {
         case 401:
         case 401:
-          var msg = '<p>Authentication failed when trying to open ' + url + ': Unauthrozied.</p>';
+          var msg = '<p>Authentication failed when trying to open ' + url + ': Unauthorized.</p>';
           break;
           break;
         case 403:
         case 403:
           if(jqxhr.responseJSON !== undefined && jqxhr.responseJSON.RemoteException !== undefined) {
           if(jqxhr.responseJSON !== undefined && jqxhr.responseJSON.RemoteException !== undefined) {

+ 7 - 7
hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm

@@ -76,14 +76,14 @@ HDFS NFS Gateway
 
 
 ----
 ----
   <property>
   <property>
-    <name>dfs.nfs.keytab.file</name>
+    <name>nfs.keytab.file</name>
     <value>/etc/hadoop/conf/nfsserver.keytab</value> <!-- path to the nfs gateway keytab -->
     <value>/etc/hadoop/conf/nfsserver.keytab</value> <!-- path to the nfs gateway keytab -->
   </property>
   </property>
 ----
 ----
 
 
 ----
 ----
   <property>
   <property>
-    <name>dfs.nfs.kerberos.principal</name>
+    <name>nfs.kerberos.principal</name>
     <value>nfsserver/_HOST@YOUR-REALM.COM</value>
     <value>nfsserver/_HOST@YOUR-REALM.COM</value>
   </property>
   </property>
 ----
 ----
@@ -121,7 +121,7 @@ HDFS NFS Gateway
 
 
 ----
 ----
   <property>    
   <property>    
-    <name>dfs.nfs3.dump.dir</name>
+    <name>nfs.dump.dir</name>
     <value>/tmp/.hdfs-nfs</value>
     <value>/tmp/.hdfs-nfs</value>
   </property>
   </property>
 ---- 
 ---- 
@@ -134,7 +134,7 @@ HDFS NFS Gateway
 
 
 ----
 ----
 <property>
 <property>
-  <name>dfs.nfs.rtmax</name>
+  <name>nfs.rtmax</name>
   <value>1048576</value>
   <value>1048576</value>
   <description>This is the maximum size in bytes of a READ request
   <description>This is the maximum size in bytes of a READ request
     supported by the NFS gateway. If you change this, make sure you
     supported by the NFS gateway. If you change this, make sure you
@@ -146,7 +146,7 @@ HDFS NFS Gateway
 
 
 ----
 ----
 <property>
 <property>
-  <name>dfs.nfs.wtmax</name>
+  <name>nfs.wtmax</name>
   <value>65536</value>
   <value>65536</value>
   <description>This is the maximum size in bytes of a WRITE request
   <description>This is the maximum size in bytes of a WRITE request
     supported by the NFS gateway. If you change this, make sure you
     supported by the NFS gateway. If you change this, make sure you
@@ -167,7 +167,7 @@ HDFS NFS Gateway
 
 
 ----
 ----
 <property>
 <property>
-  <name>dfs.nfs.exports.allowed.hosts</name>
+  <name>nfs.exports.allowed.hosts</name>
   <value>* rw</value>
   <value>* rw</value>
 </property>
 </property>
 ----
 ----
@@ -345,7 +345,7 @@ HDFS NFS Gateway
   file in the event one wishes to access the HDFS NFS Gateway from a system with
   file in the event one wishes to access the HDFS NFS Gateway from a system with
   a completely disparate set of UIDs/GIDs. By default this file is located at
   a completely disparate set of UIDs/GIDs. By default this file is located at
   "/etc/nfs.map", but a custom location can be configured by setting the
   "/etc/nfs.map", but a custom location can be configured by setting the
-  "dfs.nfs.static.mapping.file" property to the path of the static mapping file.
+  "nfs.static.mapping.file" property to the path of the static mapping file.
   The format of the static mapping file is similar to what is described in the
   The format of the static mapping file is similar to what is described in the
   exports(5) manual page, but roughly it is:
   exports(5) manual page, but roughly it is:
 
 

+ 12 - 0
hadoop-yarn-project/CHANGES.txt

@@ -29,6 +29,9 @@ Release 2.5.0 - UNRELEASED
     YARN-1362. Distinguish between nodemanager shutdown for decommission vs shutdown 
     YARN-1362. Distinguish between nodemanager shutdown for decommission vs shutdown 
     for restart. (Jason Lowe via junping_du)
     for restart. (Jason Lowe via junping_du)
 
 
+    YARN-1338. Recover localized resource cache state upon nodemanager restart 
+    (Jason Lowe via junping_du)
+
   IMPROVEMENTS
   IMPROVEMENTS
 
 
     YARN-1479. Invalid NaN values in Hadoop REST API JSON response (Chen He via
     YARN-1479. Invalid NaN values in Hadoop REST API JSON response (Chen He via
@@ -117,6 +120,12 @@ Release 2.5.0 - UNRELEASED
     YARN-596. Use scheduling policies throughout the queue hierarchy to decide
     YARN-596. Use scheduling policies throughout the queue hierarchy to decide
     which containers to preempt (Wei Yan via Sandy Ryza)
     which containers to preempt (Wei Yan via Sandy Ryza)
 
 
+    YARN-2054. Better defaults for YARN ZK configs for retries and retry-inteval 
+    when HA is enabled. (kasha)
+
+    YARN-2010. Document yarn.resourcemanager.zk-auth and its scope. 
+    (Robert Kanter via kasha)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
   BUG FIXES 
   BUG FIXES 
@@ -162,6 +171,9 @@ Release 2.5.0 - UNRELEASED
     that both Timeline Server and client can access them. (Zhijie Shen via
     that both Timeline Server and client can access them. (Zhijie Shen via
     vinodkv)
     vinodkv)
 
 
+    YARN-1868. YARN status web ui does not show correctly in IE 11.
+    (Chuan Liu via cnauroth)
+
 Release 2.4.1 - UNRELEASED
 Release 2.4.1 - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

@@ -324,11 +324,11 @@ public class YarnConfiguration extends Configuration {
   public static final String RM_ZK_ADDRESS = RM_ZK_PREFIX + "address";
   public static final String RM_ZK_ADDRESS = RM_ZK_PREFIX + "address";
 
 
   public static final String RM_ZK_NUM_RETRIES = RM_ZK_PREFIX + "num-retries";
   public static final String RM_ZK_NUM_RETRIES = RM_ZK_PREFIX + "num-retries";
-  public static final int DEFAULT_ZK_RM_NUM_RETRIES = 500;
+  public static final int DEFAULT_ZK_RM_NUM_RETRIES = 1000;
 
 
   public static final String RM_ZK_RETRY_INTERVAL_MS =
   public static final String RM_ZK_RETRY_INTERVAL_MS =
       RM_ZK_PREFIX + "retry-interval-ms";
       RM_ZK_PREFIX + "retry-interval-ms";
-  public static final long DEFAULT_RM_ZK_RETRY_INTERVAL_MS = 2000;
+  public static final long DEFAULT_RM_ZK_RETRY_INTERVAL_MS = 1000;
 
 
   public static final String RM_ZK_TIMEOUT_MS = RM_ZK_PREFIX + "timeout-ms";
   public static final String RM_ZK_TIMEOUT_MS = RM_ZK_PREFIX + "timeout-ms";
   public static final int DEFAULT_RM_ZK_TIMEOUT_MS = 10000;
   public static final int DEFAULT_RM_ZK_TIMEOUT_MS = 10000;

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/HtmlPage.java

@@ -79,7 +79,8 @@ public abstract class HtmlPage extends TextView {
   @Override
   @Override
   public void render() {
   public void render() {
     puts(DOCTYPE);
     puts(DOCTYPE);
-    render(page().html().meta_http("Content-type", MimeType.HTML));
+    render(page().html().meta_http("X-UA-Compatible", "IE=8")
+        .meta_http("Content-type", MimeType.HTML));
     if (page().nestLevel() != 0) {
     if (page().nestLevel() != 0) {
       throw new WebAppException("Error rendering page: nestLevel="+
       throw new WebAppException("Error rendering page: nestLevel="+
                                 page().nestLevel());
                                 page().nestLevel());

+ 17 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml

@@ -309,14 +309,17 @@
   <property>
   <property>
     <description>Number of times RM tries to connect to ZooKeeper.</description>
     <description>Number of times RM tries to connect to ZooKeeper.</description>
     <name>yarn.resourcemanager.zk-num-retries</name>
     <name>yarn.resourcemanager.zk-num-retries</name>
-    <value>500</value>
+    <value>1000</value>
   </property>
   </property>
 
 
   <property>
   <property>
     <description>Retry interval in milliseconds when connecting to ZooKeeper.
     <description>Retry interval in milliseconds when connecting to ZooKeeper.
+      When HA is enabled, the value here is NOT used. It is generated
+      automatically from yarn.resourcemanager.zk-timeout-ms and
+      yarn.resourcemanager.zk-num-retries.
     </description>
     </description>
     <name>yarn.resourcemanager.zk-retry-interval-ms</name>
     <name>yarn.resourcemanager.zk-retry-interval-ms</name>
-    <value>2000</value>
+    <value>1000</value>
   </property>
   </property>
 
 
   <property>
   <property>
@@ -369,6 +372,18 @@
     <name>yarn.resourcemanager.zk-state-store.root-node.acl</name>
     <name>yarn.resourcemanager.zk-state-store.root-node.acl</name>
   </property>
   </property>
 
 
+  <property>
+    <description>
+        Specify the auths to be used for the ACL's specified in both the
+        yarn.resourcemanager.zk-acl and
+        yarn.resourcemanager.zk-state-store.root-node.acl properties.  This
+        takes a comma-separated list of authentication mechanisms, each of the
+        form 'scheme:auth' (the same syntax used for the 'addAuth' command in
+        the ZK CLI).
+    </description>
+    <name>yarn.resourcemanager.zk-auth</name>
+  </property>
+
   <property>
   <property>
     <description>URI pointing to the location of the FileSystem path where
     <description>URI pointing to the location of the FileSystem path where
     RM state will be stored. This must be supplied when using
     RM state will be stored. This must be supplied when using

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/TestSubViews.java

@@ -70,6 +70,6 @@ public class TestSubViews {
     out.flush();
     out.flush();
     verify(out).print("sub1 text");
     verify(out).print("sub1 text");
     verify(out).print("sub2 text");
     verify(out).print("sub2 text");
-    verify(out, times(15)).println(); // test inline transition across views
+    verify(out, times(16)).println(); // test inline transition across views
   }
   }
 }
 }

+ 8 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/view/TestHtmlPage.java

@@ -22,11 +22,12 @@ import com.google.inject.Injector;
 
 
 import java.io.PrintWriter;
 import java.io.PrintWriter;
 
 
+import org.apache.hadoop.yarn.webapp.MimeType;
 import org.apache.hadoop.yarn.webapp.WebAppException;
 import org.apache.hadoop.yarn.webapp.WebAppException;
 import org.apache.hadoop.yarn.webapp.test.WebAppTests;
 import org.apache.hadoop.yarn.webapp.test.WebAppTests;
 import org.apache.hadoop.yarn.webapp.view.HtmlPage;
 import org.apache.hadoop.yarn.webapp.view.HtmlPage;
-
 import org.junit.Test;
 import org.junit.Test;
+
 import static org.mockito.Mockito.*;
 import static org.mockito.Mockito.*;
 
 
 public class TestHtmlPage {
 public class TestHtmlPage {
@@ -53,6 +54,12 @@ public class TestHtmlPage {
     Injector injector = WebAppTests.testPage(TestView.class);
     Injector injector = WebAppTests.testPage(TestView.class);
     PrintWriter out = injector.getInstance(PrintWriter.class);
     PrintWriter out = injector.getInstance(PrintWriter.class);
 
 
+    // Verify the HTML page has correct meta tags in the header
+    verify(out).print(" http-equiv=\"X-UA-Compatible\"");
+    verify(out).print(" content=\"IE=8\"");
+    verify(out).print(" http-equiv=\"Content-type\"");
+    verify(out).print(String.format(" content=\"%s\"", MimeType.HTML));
+
     verify(out).print("test");
     verify(out).print("test");
     verify(out).print(" id=\"testid\"");
     verify(out).print(" id=\"testid\"");
     verify(out).print("test note");
     verify(out).print("test note");

+ 5 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml

@@ -156,6 +156,10 @@
       <groupId>org.apache.hadoop</groupId>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-server-common</artifactId>
       <artifactId>hadoop-yarn-server-common</artifactId>
     </dependency>
     </dependency>
+    <dependency>
+      <groupId>org.fusesource.leveldbjni</groupId>
+      <artifactId>leveldbjni-all</artifactId>
+    </dependency>
   </dependencies>
   </dependencies>
 
 
   <profiles>
   <profiles>
@@ -292,6 +296,7 @@
               <source>
               <source>
                 <directory>${basedir}/src/main/proto</directory>
                 <directory>${basedir}/src/main/proto</directory>
                 <includes>
                 <includes>
+		  <include>yarn_server_nodemanager_recovery.proto</include>
                   <include>yarn_server_nodemanager_service_protos.proto</include>
                   <include>yarn_server_nodemanager_service_protos.proto</include>
                   <include>LocalizationProtocol.proto</include>
                   <include>LocalizationProtocol.proto</include>
                 </includes>
                 </includes>

+ 3 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java

@@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
@@ -67,6 +68,8 @@ public interface Context {
 
 
   ApplicationACLsManager getApplicationACLsManager();
   ApplicationACLsManager getApplicationACLsManager();
 
 
+  NMStateStoreService getNMStateStore();
+
   boolean getDecommissioned();
   boolean getDecommissioned();
 
 
   void setDecommissioned(boolean isDecommissioned);
   void setDecommissioned(boolean isDecommissioned);

+ 49 - 9
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java

@@ -53,6 +53,9 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManag
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMLeveldbStateStoreService;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
 import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer;
 import org.apache.hadoop.yarn.server.nodemanager.webapp.WebServer;
@@ -78,6 +81,7 @@ public class NodeManager extends CompositeService
   private ContainerManagerImpl containerManager;
   private ContainerManagerImpl containerManager;
   private NodeStatusUpdater nodeStatusUpdater;
   private NodeStatusUpdater nodeStatusUpdater;
   private static CompositeServiceShutdownHook nodeManagerShutdownHook; 
   private static CompositeServiceShutdownHook nodeManagerShutdownHook; 
+  private NMStateStoreService nmStore = null;
   
   
   private AtomicBoolean isStopping = new AtomicBoolean(false);
   private AtomicBoolean isStopping = new AtomicBoolean(false);
   
   
@@ -115,9 +119,10 @@ public class NodeManager extends CompositeService
 
 
   protected NMContext createNMContext(
   protected NMContext createNMContext(
       NMContainerTokenSecretManager containerTokenSecretManager,
       NMContainerTokenSecretManager containerTokenSecretManager,
-      NMTokenSecretManagerInNM nmTokenSecretManager) {
+      NMTokenSecretManagerInNM nmTokenSecretManager,
+      NMStateStoreService stateStore) {
     return new NMContext(containerTokenSecretManager, nmTokenSecretManager,
     return new NMContext(containerTokenSecretManager, nmTokenSecretManager,
-        dirsHandler, aclsManager);
+        dirsHandler, aclsManager, stateStore);
   }
   }
 
 
   protected void doSecureLogin() throws IOException {
   protected void doSecureLogin() throws IOException {
@@ -125,11 +130,8 @@ public class NodeManager extends CompositeService
         YarnConfiguration.NM_PRINCIPAL);
         YarnConfiguration.NM_PRINCIPAL);
   }
   }
 
 
-  @Override
-  protected void serviceInit(Configuration conf) throws Exception {
-
-    conf.setBoolean(Dispatcher.DISPATCHER_EXIT_ON_ERROR_KEY, true);
-
+  private void initAndStartRecoveryStore(Configuration conf)
+      throws IOException {
     boolean recoveryEnabled = conf.getBoolean(
     boolean recoveryEnabled = conf.getBoolean(
         YarnConfiguration.NM_RECOVERY_ENABLED,
         YarnConfiguration.NM_RECOVERY_ENABLED,
         YarnConfiguration.DEFAULT_NM_RECOVERY_ENABLED);
         YarnConfiguration.DEFAULT_NM_RECOVERY_ENABLED);
@@ -142,7 +144,36 @@ public class NodeManager extends CompositeService
       }
       }
       Path recoveryRoot = new Path(recoveryDirName);
       Path recoveryRoot = new Path(recoveryDirName);
       recoveryFs.mkdirs(recoveryRoot, new FsPermission((short)0700));
       recoveryFs.mkdirs(recoveryRoot, new FsPermission((short)0700));
+      nmStore = new NMLeveldbStateStoreService();
+    } else {
+      nmStore = new NMNullStateStoreService();
+    }
+    nmStore.init(conf);
+    nmStore.start();
+  }
+
+  private void stopRecoveryStore() throws IOException {
+    nmStore.stop();
+    if (context.getDecommissioned() && nmStore.canRecover()) {
+      LOG.info("Removing state store due to decommission");
+      Configuration conf = getConfig();
+      Path recoveryRoot = new Path(
+          conf.get(YarnConfiguration.NM_RECOVERY_DIR));
+      LOG.info("Removing state store at " + recoveryRoot
+          + " due to decommission");
+      FileSystem recoveryFs = FileSystem.getLocal(conf);
+      if (!recoveryFs.delete(recoveryRoot, true)) {
+        LOG.warn("Unable to delete " + recoveryRoot);
+      }
     }
     }
+  }
+
+  @Override
+  protected void serviceInit(Configuration conf) throws Exception {
+
+    conf.setBoolean(Dispatcher.DISPATCHER_EXIT_ON_ERROR_KEY, true);
+
+    initAndStartRecoveryStore(conf);
 
 
     NMContainerTokenSecretManager containerTokenSecretManager =
     NMContainerTokenSecretManager containerTokenSecretManager =
         new NMContainerTokenSecretManager(conf);
         new NMContainerTokenSecretManager(conf);
@@ -171,7 +202,7 @@ public class NodeManager extends CompositeService
     dirsHandler = nodeHealthChecker.getDiskHandler();
     dirsHandler = nodeHealthChecker.getDiskHandler();
 
 
     this.context = createNMContext(containerTokenSecretManager,
     this.context = createNMContext(containerTokenSecretManager,
-        nmTokenSecretManager);
+        nmTokenSecretManager, nmStore);
     
     
     nodeStatusUpdater =
     nodeStatusUpdater =
         createNodeStatusUpdater(context, dispatcher, nodeHealthChecker);
         createNodeStatusUpdater(context, dispatcher, nodeHealthChecker);
@@ -220,6 +251,7 @@ public class NodeManager extends CompositeService
       return;
       return;
     }
     }
     super.serviceStop();
     super.serviceStop();
+    stopRecoveryStore();
     DefaultMetricsSystem.shutdown();
     DefaultMetricsSystem.shutdown();
   }
   }
 
 
@@ -272,11 +304,13 @@ public class NodeManager extends CompositeService
     private WebServer webServer;
     private WebServer webServer;
     private final NodeHealthStatus nodeHealthStatus = RecordFactoryProvider
     private final NodeHealthStatus nodeHealthStatus = RecordFactoryProvider
         .getRecordFactory(null).newRecordInstance(NodeHealthStatus.class);
         .getRecordFactory(null).newRecordInstance(NodeHealthStatus.class);
+    private final NMStateStoreService stateStore;
     private boolean isDecommissioned = false;
     private boolean isDecommissioned = false;
 
 
     public NMContext(NMContainerTokenSecretManager containerTokenSecretManager,
     public NMContext(NMContainerTokenSecretManager containerTokenSecretManager,
         NMTokenSecretManagerInNM nmTokenSecretManager,
         NMTokenSecretManagerInNM nmTokenSecretManager,
-        LocalDirsHandlerService dirsHandler, ApplicationACLsManager aclsManager) {
+        LocalDirsHandlerService dirsHandler, ApplicationACLsManager aclsManager,
+        NMStateStoreService stateStore) {
       this.containerTokenSecretManager = containerTokenSecretManager;
       this.containerTokenSecretManager = containerTokenSecretManager;
       this.nmTokenSecretManager = nmTokenSecretManager;
       this.nmTokenSecretManager = nmTokenSecretManager;
       this.dirsHandler = dirsHandler;
       this.dirsHandler = dirsHandler;
@@ -284,6 +318,7 @@ public class NodeManager extends CompositeService
       this.nodeHealthStatus.setIsNodeHealthy(true);
       this.nodeHealthStatus.setIsNodeHealthy(true);
       this.nodeHealthStatus.setHealthReport("Healthy");
       this.nodeHealthStatus.setHealthReport("Healthy");
       this.nodeHealthStatus.setLastHealthReportTime(System.currentTimeMillis());
       this.nodeHealthStatus.setLastHealthReportTime(System.currentTimeMillis());
+      this.stateStore = stateStore;
     }
     }
 
 
     /**
     /**
@@ -351,6 +386,11 @@ public class NodeManager extends CompositeService
       return aclsManager;
       return aclsManager;
     }
     }
 
 
+    @Override
+    public NMStateStoreService getNMStateStore() {
+      return stateStore;
+    }
+
     @Override
     @Override
     public boolean getDecommissioned() {
     public boolean getDecommissioned() {
       return isDecommissioned;
       return isDecommissioned;

+ 12 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java

@@ -22,6 +22,7 @@ import static org.apache.hadoop.service.Service.STATE.STARTED;
 
 
 import java.io.IOException;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.net.InetSocketAddress;
+import java.net.URISyntaxException;
 import java.nio.ByteBuffer;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Arrays;
@@ -116,6 +117,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.Contai
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
 import org.apache.hadoop.yarn.server.nodemanager.security.authorize.NMPolicyProvider;
 import org.apache.hadoop.yarn.server.nodemanager.security.authorize.NMPolicyProvider;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
@@ -218,6 +220,15 @@ public class ContainerManagerImpl extends CompositeService implements
         SHUTDOWN_CLEANUP_SLOP_MS;
         SHUTDOWN_CLEANUP_SLOP_MS;
 
 
     super.serviceInit(conf);
     super.serviceInit(conf);
+    recover();
+  }
+
+  private void recover() throws IOException, URISyntaxException {
+    NMStateStoreService stateStore = context.getNMStateStore();
+    if (stateStore.canRecover()) {
+      rsrcLocalizationSrvc.recoverLocalizedResources(
+          stateStore.loadLocalizationState());
+    }
   }
   }
 
 
   protected LogHandler createLogHandler(Configuration conf, Context context,
   protected LogHandler createLogHandler(Configuration conf, Context context,
@@ -239,7 +250,7 @@ public class ContainerManagerImpl extends CompositeService implements
   protected ResourceLocalizationService createResourceLocalizationService(
   protected ResourceLocalizationService createResourceLocalizationService(
       ContainerExecutor exec, DeletionService deletionContext) {
       ContainerExecutor exec, DeletionService deletionContext) {
     return new ResourceLocalizationService(this.dispatcher, exec,
     return new ResourceLocalizationService(this.dispatcher, exec,
-        deletionContext, dirsHandler);
+        deletionContext, dirsHandler, context.getNMStateStore());
   }
   }
 
 
   protected ContainersLauncher createContainersLauncher(Context context,
   protected ContainersLauncher createContainersLauncher(Context context,

+ 77 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalCacheDirectoryManager.java

@@ -26,6 +26,8 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
 /**
  * {@link LocalCacheDirectoryManager} is used for managing hierarchical
  * {@link LocalCacheDirectoryManager} is used for managing hierarchical
  * directories for local cache. It will allow to restrict the number of files in
  * directories for local cache. It will allow to restrict the number of files in
@@ -99,6 +101,57 @@ public class LocalCacheDirectoryManager {
     }
     }
   }
   }
 
 
+  /**
+   * Increment the file count for a relative directory within the cache
+   * 
+   * @param relPath the relative path
+   */
+  public synchronized void incrementFileCountForPath(String relPath) {
+    relPath = relPath == null ? "" : relPath.trim();
+    Directory subDir = knownDirectories.get(relPath);
+    if (subDir == null) {
+      int dirnum = Directory.getDirectoryNumber(relPath);
+      totalSubDirectories = Math.max(dirnum, totalSubDirectories);
+      subDir = new Directory(dirnum);
+      nonFullDirectories.add(subDir);
+      knownDirectories.put(subDir.getRelativePath(), subDir);
+    }
+    if (subDir.incrementAndGetCount() >= perDirectoryFileLimit) {
+      nonFullDirectories.remove(subDir);
+    }
+  }
+
+  /**
+   * Given a path to a directory within a local cache tree return the
+   * root of the cache directory.
+   * 
+   * @param path the directory within a cache directory
+   * @return the local cache directory root or null if not found
+   */
+  public static Path getCacheDirectoryRoot(Path path) {
+    while (path != null) {
+      String name = path.getName();
+      if (name.length() != 1) {
+        return path;
+      }
+      int dirnum = DIRECTORIES_PER_LEVEL;
+      try {
+        dirnum = Integer.parseInt(name, DIRECTORIES_PER_LEVEL);
+      } catch (NumberFormatException e) {
+      }
+      if (dirnum >= DIRECTORIES_PER_LEVEL) {
+        return path;
+      }
+      path = path.getParent();
+    }
+    return path;
+  }
+
+  @VisibleForTesting
+  synchronized Directory getDirectory(String relPath) {
+    return knownDirectories.get(relPath);
+  }
+
   /*
   /*
    * It limits the number of files and sub directories in the directory to the
    * It limits the number of files and sub directories in the directory to the
    * limit LocalCacheDirectoryManager#perDirectoryFileLimit.
    * limit LocalCacheDirectoryManager#perDirectoryFileLimit.
@@ -108,11 +161,9 @@ public class LocalCacheDirectoryManager {
     private final String relativePath;
     private final String relativePath;
     private int fileCount;
     private int fileCount;
 
 
-    public Directory(int directoryNo) {
-      fileCount = 0;
-      if (directoryNo == 0) {
-        relativePath = "";
-      } else {
+    static String getRelativePath(int directoryNo) {
+      String relativePath = "";
+      if (directoryNo > 0) {
         String tPath = Integer.toString(directoryNo - 1, DIRECTORIES_PER_LEVEL);
         String tPath = Integer.toString(directoryNo - 1, DIRECTORIES_PER_LEVEL);
         StringBuffer sb = new StringBuffer();
         StringBuffer sb = new StringBuffer();
         if (tPath.length() == 1) {
         if (tPath.length() == 1) {
@@ -128,6 +179,27 @@ public class LocalCacheDirectoryManager {
         }
         }
         relativePath = sb.toString();
         relativePath = sb.toString();
       }
       }
+      return relativePath;
+    }
+
+    static int getDirectoryNumber(String relativePath) {
+      String numStr = relativePath.replace("/", "");
+      if (relativePath.isEmpty()) {
+        return 0;
+      }
+      if (numStr.length() > 1) {
+        // undo step from getRelativePath() to reuse 0th sub directory
+        String firstChar = Integer.toString(
+            Integer.parseInt(numStr.substring(0, 1),
+                DIRECTORIES_PER_LEVEL) + 1, DIRECTORIES_PER_LEVEL);
+        numStr = firstChar + numStr.substring(1);
+      }
+      return Integer.parseInt(numStr, DIRECTORIES_PER_LEVEL) + 1;
+    }
+
+    public Directory(int directoryNo) {
+      fileCount = 0;
+      relativePath = getRelativePath(directoryNo);
     }
     }
 
 
     public int incrementAndGetCount() {
     public int incrementAndGetCount() {

+ 0 - 10
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTracker.java

@@ -18,15 +18,12 @@
 
 
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer;
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer;
 
 
-import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEvent;
 
 
-import com.google.common.annotations.VisibleForTesting;
-
 /**
 /**
  * Component tracking resources all of the same {@link LocalResourceVisibility}
  * Component tracking resources all of the same {@link LocalResourceVisibility}
  * 
  * 
@@ -34,18 +31,11 @@ import com.google.common.annotations.VisibleForTesting;
 interface LocalResourcesTracker
 interface LocalResourcesTracker
     extends EventHandler<ResourceEvent>, Iterable<LocalizedResource> {
     extends EventHandler<ResourceEvent>, Iterable<LocalizedResource> {
 
 
-  // TODO: Not used at all!!
-  boolean contains(LocalResourceRequest resource);
-
   boolean remove(LocalizedResource req, DeletionService delService);
   boolean remove(LocalizedResource req, DeletionService delService);
 
 
   Path getPathForLocalization(LocalResourceRequest req, Path localDirPath);
   Path getPathForLocalization(LocalResourceRequest req, Path localDirPath);
 
 
   String getUser();
   String getUser();
 
 
-  long nextUniqueNumber();
-  
-  @VisibleForTesting
-  @Private
   LocalizedResource getLocalizedResource(LocalResourceRequest request);
   LocalizedResource getLocalizedResource(LocalResourceRequest request);
 }
 }

+ 156 - 29
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java

@@ -18,6 +18,7 @@
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer;
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer;
 
 
 import java.io.File;
 import java.io.File;
+import java.io.IOException;
 import java.util.Iterator;
 import java.util.Iterator;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ConcurrentMap;
@@ -27,14 +28,21 @@ import java.util.regex.Pattern;
 
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.impl.pb.LocalResourcePBImpl;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEventType;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRecoveredEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
 
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.annotations.VisibleForTesting;
 
 
@@ -53,6 +61,7 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
       .compile(RANDOM_DIR_REGEX);
       .compile(RANDOM_DIR_REGEX);
 
 
   private final String user;
   private final String user;
+  private final ApplicationId appId;
   private final Dispatcher dispatcher;
   private final Dispatcher dispatcher;
   private final ConcurrentMap<LocalResourceRequest,LocalizedResource> localrsrc;
   private final ConcurrentMap<LocalResourceRequest,LocalizedResource> localrsrc;
   private Configuration conf;
   private Configuration conf;
@@ -77,17 +86,22 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
    * per APPLICATION, USER and PUBLIC cache.
    * per APPLICATION, USER and PUBLIC cache.
    */
    */
   private AtomicLong uniqueNumberGenerator = new AtomicLong(9);
   private AtomicLong uniqueNumberGenerator = new AtomicLong(9);
+  private NMStateStoreService stateStore;
 
 
-  public LocalResourcesTrackerImpl(String user, Dispatcher dispatcher,
-      boolean useLocalCacheDirectoryManager, Configuration conf) {
-    this(user, dispatcher,
+  public LocalResourcesTrackerImpl(String user, ApplicationId appId,
+      Dispatcher dispatcher, boolean useLocalCacheDirectoryManager,
+      Configuration conf, NMStateStoreService stateStore) {
+    this(user, appId, dispatcher,
       new ConcurrentHashMap<LocalResourceRequest, LocalizedResource>(),
       new ConcurrentHashMap<LocalResourceRequest, LocalizedResource>(),
-      useLocalCacheDirectoryManager, conf);
+      useLocalCacheDirectoryManager, conf, stateStore);
   }
   }
 
 
-  LocalResourcesTrackerImpl(String user, Dispatcher dispatcher,
+  LocalResourcesTrackerImpl(String user, ApplicationId appId,
+      Dispatcher dispatcher,
       ConcurrentMap<LocalResourceRequest,LocalizedResource> localrsrc,
       ConcurrentMap<LocalResourceRequest,LocalizedResource> localrsrc,
-      boolean useLocalCacheDirectoryManager, Configuration conf) {
+      boolean useLocalCacheDirectoryManager, Configuration conf,
+      NMStateStoreService stateStore) {
+    this.appId = appId;
     this.user = user;
     this.user = user;
     this.dispatcher = dispatcher;
     this.dispatcher = dispatcher;
     this.localrsrc = localrsrc;
     this.localrsrc = localrsrc;
@@ -98,6 +112,7 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
         new ConcurrentHashMap<LocalResourceRequest, Path>();
         new ConcurrentHashMap<LocalResourceRequest, Path>();
     }
     }
     this.conf = conf;
     this.conf = conf;
+    this.stateStore = stateStore;
   }
   }
 
 
   /*
   /*
@@ -119,8 +134,7 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
       if (rsrc != null && (!isResourcePresent(rsrc))) {
       if (rsrc != null && (!isResourcePresent(rsrc))) {
         LOG.info("Resource " + rsrc.getLocalPath()
         LOG.info("Resource " + rsrc.getLocalPath()
             + " is missing, localizing it again");
             + " is missing, localizing it again");
-        localrsrc.remove(req);
-        decrementFileCountForLocalCacheDirectory(req, rsrc);
+        removeResource(req);
         rsrc = null;
         rsrc = null;
       }
       }
       if (null == rsrc) {
       if (null == rsrc) {
@@ -141,15 +155,102 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
       }
       }
       break;
       break;
     case LOCALIZATION_FAILED:
     case LOCALIZATION_FAILED:
-      decrementFileCountForLocalCacheDirectory(req, null);
       /*
       /*
        * If resource localization fails then Localized resource will be
        * If resource localization fails then Localized resource will be
        * removed from local cache.
        * removed from local cache.
        */
        */
-      localrsrc.remove(req);
+      removeResource(req);
+      break;
+    case RECOVERED:
+      if (rsrc != null) {
+        LOG.warn("Ignoring attempt to recover existing resource " + rsrc);
+        return;
+      }
+      rsrc = recoverResource(req, (ResourceRecoveredEvent) event);
+      localrsrc.put(req, rsrc);
       break;
       break;
     }
     }
+
     rsrc.handle(event);
     rsrc.handle(event);
+
+    if (event.getType() == ResourceEventType.LOCALIZED) {
+      if (rsrc.getLocalPath() != null) {
+        try {
+          stateStore.finishResourceLocalization(user, appId,
+              buildLocalizedResourceProto(rsrc));
+        } catch (IOException ioe) {
+          LOG.error("Error storing resource state for " + rsrc, ioe);
+        }
+      } else {
+        LOG.warn("Resource " + rsrc + " localized without a location");
+      }
+    }
+  }
+
+  private LocalizedResource recoverResource(LocalResourceRequest req,
+      ResourceRecoveredEvent event) {
+    // unique number for a resource is the directory of the resource
+    Path localDir = event.getLocalPath().getParent();
+    long rsrcId = Long.parseLong(localDir.getName());
+
+    // update ID generator to avoid conflicts with existing resources
+    while (true) {
+      long currentRsrcId = uniqueNumberGenerator.get();
+      long nextRsrcId = Math.max(currentRsrcId, rsrcId);
+      if (uniqueNumberGenerator.compareAndSet(currentRsrcId, nextRsrcId)) {
+        break;
+      }
+    }
+
+    incrementFileCountForLocalCacheDirectory(localDir.getParent());
+
+    return new LocalizedResource(req, dispatcher);
+  }
+
+  private LocalizedResourceProto buildLocalizedResourceProto(
+      LocalizedResource rsrc) {
+    return LocalizedResourceProto.newBuilder()
+        .setResource(buildLocalResourceProto(rsrc.getRequest()))
+        .setLocalPath(rsrc.getLocalPath().toString())
+        .setSize(rsrc.getSize())
+        .build();
+  }
+
+  private LocalResourceProto buildLocalResourceProto(LocalResource lr) {
+    LocalResourcePBImpl lrpb;
+    if (!(lr instanceof LocalResourcePBImpl)) {
+      lr = LocalResource.newInstance(lr.getResource(), lr.getType(),
+          lr.getVisibility(), lr.getSize(), lr.getTimestamp(),
+          lr.getPattern());
+    }
+    lrpb = (LocalResourcePBImpl) lr;
+    return lrpb.getProto();
+  }
+
+  public void incrementFileCountForLocalCacheDirectory(Path cacheDir) {
+    if (useLocalCacheDirectoryManager) {
+      Path cacheRoot = LocalCacheDirectoryManager.getCacheDirectoryRoot(
+          cacheDir);
+      if (cacheRoot != null) {
+        LocalCacheDirectoryManager dir = directoryManagers.get(cacheRoot);
+        if (dir == null) {
+          dir = new LocalCacheDirectoryManager(conf);
+          LocalCacheDirectoryManager otherDir =
+              directoryManagers.putIfAbsent(cacheRoot, dir);
+          if (otherDir != null) {
+            dir = otherDir;
+          }
+        }
+        if (cacheDir.equals(cacheRoot)) {
+          dir.incrementFileCountForPath("");
+        } else {
+          String dirStr = cacheDir.toUri().getRawPath();
+          String rootStr = cacheRoot.toUri().getRawPath();
+          dir.incrementFileCountForPath(
+              dirStr.substring(rootStr.length() + 1));
+        }
+      }
+    }
   }
   }
 
 
   /*
   /*
@@ -216,11 +317,6 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
     return ret;
     return ret;
   }
   }
   
   
-  @Override
-  public boolean contains(LocalResourceRequest resource) {
-    return localrsrc.containsKey(resource);
-  }
-
   @Override
   @Override
   public boolean remove(LocalizedResource rem, DeletionService delService) {
   public boolean remove(LocalizedResource rem, DeletionService delService) {
  // current synchronization guaranteed by crude RLS event for cleanup
  // current synchronization guaranteed by crude RLS event for cleanup
@@ -237,16 +333,31 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
           + " with non-zero refcount");
           + " with non-zero refcount");
       return false;
       return false;
     } else { // ResourceState is LOCALIZED or INIT
     } else { // ResourceState is LOCALIZED or INIT
-      localrsrc.remove(rem.getRequest());
       if (ResourceState.LOCALIZED.equals(rsrc.getState())) {
       if (ResourceState.LOCALIZED.equals(rsrc.getState())) {
         delService.delete(getUser(), getPathToDelete(rsrc.getLocalPath()));
         delService.delete(getUser(), getPathToDelete(rsrc.getLocalPath()));
       }
       }
-      decrementFileCountForLocalCacheDirectory(rem.getRequest(), rsrc);
+      removeResource(rem.getRequest());
       LOG.info("Removed " + rsrc.getLocalPath() + " from localized cache");
       LOG.info("Removed " + rsrc.getLocalPath() + " from localized cache");
       return true;
       return true;
     }
     }
   }
   }
 
 
+  private void removeResource(LocalResourceRequest req) {
+    LocalizedResource rsrc = localrsrc.remove(req);
+    decrementFileCountForLocalCacheDirectory(req, rsrc);
+    if (rsrc != null) {
+      Path localPath = rsrc.getLocalPath();
+      if (localPath != null) {
+        try {
+          stateStore.removeLocalizedResource(user, appId, localPath);
+        } catch (IOException e) {
+          LOG.error("Unable to remove resource " + rsrc + " from state store",
+              e);
+        }
+      }
+    }
+  }
+
   /**
   /**
    * Returns the path up to the random directory component.
    * Returns the path up to the random directory component.
    */
    */
@@ -285,6 +396,7 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
   @Override
   @Override
   public Path
   public Path
       getPathForLocalization(LocalResourceRequest req, Path localDirPath) {
       getPathForLocalization(LocalResourceRequest req, Path localDirPath) {
+    Path rPath = localDirPath;
     if (useLocalCacheDirectoryManager && localDirPath != null) {
     if (useLocalCacheDirectoryManager && localDirPath != null) {
 
 
       if (!directoryManagers.containsKey(localDirPath)) {
       if (!directoryManagers.containsKey(localDirPath)) {
@@ -293,7 +405,7 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
       }
       }
       LocalCacheDirectoryManager dir = directoryManagers.get(localDirPath);
       LocalCacheDirectoryManager dir = directoryManagers.get(localDirPath);
 
 
-      Path rPath = localDirPath;
+      rPath = localDirPath;
       String hierarchicalPath = dir.getRelativePathForLocalization();
       String hierarchicalPath = dir.getRelativePathForLocalization();
       // For most of the scenarios we will get root path only which
       // For most of the scenarios we will get root path only which
       // is an empty string
       // is an empty string
@@ -301,21 +413,36 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
         rPath = new Path(localDirPath, hierarchicalPath);
         rPath = new Path(localDirPath, hierarchicalPath);
       }
       }
       inProgressLocalResourcesMap.put(req, rPath);
       inProgressLocalResourcesMap.put(req, rPath);
-      return rPath;
-    } else {
-      return localDirPath;
     }
     }
-  }
 
 
-  @Override
-  public long nextUniqueNumber() {
-    return uniqueNumberGenerator.incrementAndGet();
+    rPath = new Path(rPath,
+        Long.toString(uniqueNumberGenerator.incrementAndGet()));
+    Path localPath = new Path(rPath, req.getPath().getName());
+    LocalizedResource rsrc = localrsrc.get(req);
+    rsrc.setLocalPath(localPath);
+    LocalResource lr = LocalResource.newInstance(req.getResource(),
+        req.getType(), req.getVisibility(), req.getSize(),
+        req.getTimestamp());
+    try {
+      stateStore.startResourceLocalization(user, appId,
+          ((LocalResourcePBImpl) lr).getProto(), localPath);
+    } catch (IOException e) {
+      LOG.error("Unable to record localization start for " + rsrc, e);
+    }
+    return rPath;
   }
   }
 
 
-  @VisibleForTesting
-  @Private
   @Override
   @Override
   public LocalizedResource getLocalizedResource(LocalResourceRequest request) {
   public LocalizedResource getLocalizedResource(LocalResourceRequest request) {
     return localrsrc.get(request);
     return localrsrc.get(request);
   }
   }
-}
+
+  @VisibleForTesting
+  LocalCacheDirectoryManager getDirectoryManager(Path localDirPath) {
+    LocalCacheDirectoryManager mgr = null;
+    if (useLocalCacheDirectoryManager) {
+      mgr = directoryManagers.get(localDirPath);
+    }
+    return mgr;
+  }
+}

+ 20 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java

@@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.even
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceFailedLocalizationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceFailedLocalizationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceLocalizedEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceLocalizedEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRecoveredEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRequestEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRequestEvent;
 import org.apache.hadoop.yarn.state.InvalidStateTransitonException;
 import org.apache.hadoop.yarn.state.InvalidStateTransitonException;
@@ -54,8 +55,8 @@ public class LocalizedResource implements EventHandler<ResourceEvent> {
 
 
   private static final Log LOG = LogFactory.getLog(LocalizedResource.class);
   private static final Log LOG = LogFactory.getLog(LocalizedResource.class);
 
 
-  Path localPath;
-  long size = -1;
+  volatile Path localPath;
+  volatile long size = -1;
   final LocalResourceRequest rsrc;
   final LocalResourceRequest rsrc;
   final Dispatcher dispatcher;
   final Dispatcher dispatcher;
   final StateMachine<ResourceState,ResourceEventType,ResourceEvent>
   final StateMachine<ResourceState,ResourceEventType,ResourceEvent>
@@ -76,6 +77,8 @@ public class LocalizedResource implements EventHandler<ResourceEvent> {
     // From INIT (ref == 0, awaiting req)
     // From INIT (ref == 0, awaiting req)
     .addTransition(ResourceState.INIT, ResourceState.DOWNLOADING,
     .addTransition(ResourceState.INIT, ResourceState.DOWNLOADING,
         ResourceEventType.REQUEST, new FetchResourceTransition())
         ResourceEventType.REQUEST, new FetchResourceTransition())
+    .addTransition(ResourceState.INIT, ResourceState.LOCALIZED,
+        ResourceEventType.RECOVERED, new RecoveredTransition())
 
 
     // From DOWNLOADING (ref > 0, may be localizing)
     // From DOWNLOADING (ref > 0, may be localizing)
     .addTransition(ResourceState.DOWNLOADING, ResourceState.DOWNLOADING,
     .addTransition(ResourceState.DOWNLOADING, ResourceState.DOWNLOADING,
@@ -157,6 +160,10 @@ public class LocalizedResource implements EventHandler<ResourceEvent> {
     return localPath;
     return localPath;
   }
   }
 
 
+  public void setLocalPath(Path localPath) {
+    this.localPath = Path.getPathWithoutSchemeAndAuthority(localPath);
+  }
+
   public long getTimestamp() {
   public long getTimestamp() {
     return timestamp.get();
     return timestamp.get();
   }
   }
@@ -234,7 +241,8 @@ public class LocalizedResource implements EventHandler<ResourceEvent> {
     @Override
     @Override
     public void transition(LocalizedResource rsrc, ResourceEvent event) {
     public void transition(LocalizedResource rsrc, ResourceEvent event) {
       ResourceLocalizedEvent locEvent = (ResourceLocalizedEvent) event;
       ResourceLocalizedEvent locEvent = (ResourceLocalizedEvent) event;
-      rsrc.localPath = locEvent.getLocation();
+      rsrc.localPath =
+          Path.getPathWithoutSchemeAndAuthority(locEvent.getLocation());
       rsrc.size = locEvent.getSize();
       rsrc.size = locEvent.getSize();
       for (ContainerId container : rsrc.ref) {
       for (ContainerId container : rsrc.ref) {
         rsrc.dispatcher.getEventHandler().handle(
         rsrc.dispatcher.getEventHandler().handle(
@@ -291,4 +299,13 @@ public class LocalizedResource implements EventHandler<ResourceEvent> {
       rsrc.release(relEvent.getContainer());
       rsrc.release(relEvent.getContainer());
     }
     }
   }
   }
+
+  private static class RecoveredTransition extends ResourceTransition {
+    @Override
+    public void transition(LocalizedResource rsrc, ResourceEvent event) {
+      ResourceRecoveredEvent recoveredEvent = (ResourceRecoveredEvent) event;
+      rsrc.localPath = recoveredEvent.getLocalPath();
+      rsrc.size = recoveredEvent.getSize();
+    }
+  }
 }
 }

+ 111 - 33
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java

@@ -74,6 +74,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.impl.pb.LocalResourcePBImpl;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.event.EventHandler;
@@ -81,6 +82,8 @@ import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService.FileDeletionTask;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService.FileDeletionTask;
@@ -109,10 +112,15 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.even
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerResourceRequestEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerResourceRequestEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceFailedLocalizationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceFailedLocalizationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceLocalizedEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceLocalizedEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRecoveredEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRequestEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRequestEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.security.LocalizerTokenIdentifier;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.security.LocalizerTokenIdentifier;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.security.LocalizerTokenSecretManager;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.security.LocalizerTokenSecretManager;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.LocalResourceTrackerState;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredLocalizationState;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredUserResources;
 import org.apache.hadoop.yarn.server.nodemanager.security.authorize.NMPolicyProvider;
 import org.apache.hadoop.yarn.server.nodemanager.security.authorize.NMPolicyProvider;
 import org.apache.hadoop.yarn.server.nodemanager.util.NodeManagerBuilderUtils;
 import org.apache.hadoop.yarn.server.nodemanager.util.NodeManagerBuilderUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
@@ -142,6 +150,7 @@ public class ResourceLocalizationService extends CompositeService
   private RecordFactory recordFactory;
   private RecordFactory recordFactory;
   private final ScheduledExecutorService cacheCleanup;
   private final ScheduledExecutorService cacheCleanup;
   private LocalizerTokenSecretManager secretManager;
   private LocalizerTokenSecretManager secretManager;
+  private NMStateStoreService stateStore;
 
 
   private LocalResourcesTracker publicRsrc;
   private LocalResourcesTracker publicRsrc;
 
 
@@ -163,7 +172,7 @@ public class ResourceLocalizationService extends CompositeService
 
 
   public ResourceLocalizationService(Dispatcher dispatcher,
   public ResourceLocalizationService(Dispatcher dispatcher,
       ContainerExecutor exec, DeletionService delService,
       ContainerExecutor exec, DeletionService delService,
-      LocalDirsHandlerService dirsHandler) {
+      LocalDirsHandlerService dirsHandler, NMStateStoreService stateStore) {
 
 
     super(ResourceLocalizationService.class.getName());
     super(ResourceLocalizationService.class.getName());
     this.exec = exec;
     this.exec = exec;
@@ -175,6 +184,7 @@ public class ResourceLocalizationService extends CompositeService
         new ThreadFactoryBuilder()
         new ThreadFactoryBuilder()
           .setNameFormat("ResourceLocalizationService Cache Cleanup")
           .setNameFormat("ResourceLocalizationService Cache Cleanup")
           .build());
           .build());
+    this.stateStore = stateStore;
   }
   }
 
 
   FileContext getLocalFileContext(Configuration conf) {
   FileContext getLocalFileContext(Configuration conf) {
@@ -203,15 +213,17 @@ public class ResourceLocalizationService extends CompositeService
   @Override
   @Override
   public void serviceInit(Configuration conf) throws Exception {
   public void serviceInit(Configuration conf) throws Exception {
     this.validateConf(conf);
     this.validateConf(conf);
-    this.publicRsrc =
-        new LocalResourcesTrackerImpl(null, dispatcher, true, conf);
+    this.publicRsrc = new LocalResourcesTrackerImpl(null, null, dispatcher,
+        true, conf, stateStore);
     this.recordFactory = RecordFactoryProvider.getRecordFactory(conf);
     this.recordFactory = RecordFactoryProvider.getRecordFactory(conf);
 
 
     try {
     try {
       FileContext lfs = getLocalFileContext(conf);
       FileContext lfs = getLocalFileContext(conf);
       lfs.setUMask(new FsPermission((short)FsPermission.DEFAULT_UMASK));
       lfs.setUMask(new FsPermission((short)FsPermission.DEFAULT_UMASK));
 
 
-      cleanUpLocalDir(lfs,delService);
+      if (!stateStore.canRecover()) {
+        cleanUpLocalDir(lfs,delService);
+      }
 
 
       List<String> localDirs = dirsHandler.getLocalDirs();
       List<String> localDirs = dirsHandler.getLocalDirs();
       for (String localDir : localDirs) {
       for (String localDir : localDirs) {
@@ -249,6 +261,74 @@ public class ResourceLocalizationService extends CompositeService
     super.serviceInit(conf);
     super.serviceInit(conf);
   }
   }
 
 
+  //Recover localized resources after an NM restart
+  public void recoverLocalizedResources(RecoveredLocalizationState state)
+      throws URISyntaxException {
+    LocalResourceTrackerState trackerState = state.getPublicTrackerState();
+    recoverTrackerResources(publicRsrc, trackerState);
+
+    for (Map.Entry<String, RecoveredUserResources> userEntry :
+         state.getUserResources().entrySet()) {
+      String user = userEntry.getKey();
+      RecoveredUserResources userResources = userEntry.getValue();
+      trackerState = userResources.getPrivateTrackerState();
+      if (!trackerState.isEmpty()) {
+        LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user,
+            null, dispatcher, true, super.getConfig(), stateStore);
+        LocalResourcesTracker oldTracker = privateRsrc.putIfAbsent(user,
+            tracker);
+        if (oldTracker != null) {
+          tracker = oldTracker;
+        }
+        recoverTrackerResources(tracker, trackerState);
+      }
+
+      for (Map.Entry<ApplicationId, LocalResourceTrackerState> appEntry :
+           userResources.getAppTrackerStates().entrySet()) {
+        trackerState = appEntry.getValue();
+        if (!trackerState.isEmpty()) {
+          ApplicationId appId = appEntry.getKey();
+          String appIdStr = ConverterUtils.toString(appId);
+          LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user,
+              appId, dispatcher, false, super.getConfig(), stateStore);
+          LocalResourcesTracker oldTracker = appRsrc.putIfAbsent(appIdStr,
+              tracker);
+          if (oldTracker != null) {
+            tracker = oldTracker;
+          }
+          recoverTrackerResources(tracker, trackerState);
+        }
+      }
+    }
+  }
+
+  private void recoverTrackerResources(LocalResourcesTracker tracker,
+      LocalResourceTrackerState state) throws URISyntaxException {
+    for (LocalizedResourceProto proto : state.getLocalizedResources()) {
+      LocalResource rsrc = new LocalResourcePBImpl(proto.getResource());
+      LocalResourceRequest req = new LocalResourceRequest(rsrc);
+      LOG.info("Recovering localized resource " + req + " at "
+          + proto.getLocalPath());
+      tracker.handle(new ResourceRecoveredEvent(req,
+          new Path(proto.getLocalPath()), proto.getSize()));
+    }
+
+    for (Map.Entry<LocalResourceProto, Path> entry :
+         state.getInProgressResources().entrySet()) {
+      LocalResource rsrc = new LocalResourcePBImpl(entry.getKey());
+      LocalResourceRequest req = new LocalResourceRequest(rsrc);
+      Path localPath = entry.getValue();
+      tracker.handle(new ResourceRecoveredEvent(req, localPath, 0));
+
+      // delete any in-progress localizations, containers will request again
+      LOG.info("Deleting in-progress localization for " + req + " at "
+          + localPath);
+      tracker.remove(tracker.getLocalizedResource(req), delService);
+    }
+
+    // TODO: remove untracked directories in local filesystem
+  }
+
   @Override
   @Override
   public LocalizerHeartbeatResponse heartbeat(LocalizerStatus status) {
   public LocalizerHeartbeatResponse heartbeat(LocalizerStatus status) {
     return localizerTracker.processHeartbeat(status);
     return localizerTracker.processHeartbeat(status);
@@ -337,17 +417,10 @@ public class ResourceLocalizationService extends CompositeService
     // 0) Create application tracking structs
     // 0) Create application tracking structs
     String userName = app.getUser();
     String userName = app.getUser();
     privateRsrc.putIfAbsent(userName, new LocalResourcesTrackerImpl(userName,
     privateRsrc.putIfAbsent(userName, new LocalResourcesTrackerImpl(userName,
-      dispatcher, true, super.getConfig()));
-    if (null != appRsrc.putIfAbsent(
-      ConverterUtils.toString(app.getAppId()),
-      new LocalResourcesTrackerImpl(app.getUser(), dispatcher, false, super
-        .getConfig()))) {
-      LOG.warn("Initializing application " + app + " already present");
-      assert false; // TODO: FIXME assert doesn't help
-                    // ^ The condition is benign. Tests should fail and it
-                    // should appear in logs, but it's an internal error
-                    // that should have no effect on applications
-    }
+        null, dispatcher, true, super.getConfig(), stateStore));
+    String appIdStr = ConverterUtils.toString(app.getAppId());
+    appRsrc.putIfAbsent(appIdStr, new LocalResourcesTrackerImpl(app.getUser(),
+        app.getAppId(), dispatcher, false, super.getConfig(), stateStore));
     // 1) Signal container init
     // 1) Signal container init
     //
     //
     // This is handled by the ApplicationImpl state machine and allows
     // This is handled by the ApplicationImpl state machine and allows
@@ -446,18 +519,28 @@ public class ResourceLocalizationService extends CompositeService
 
 
   @SuppressWarnings({"unchecked"})
   @SuppressWarnings({"unchecked"})
   private void handleDestroyApplicationResources(Application application) {
   private void handleDestroyApplicationResources(Application application) {
-    String userName;
-    String appIDStr;
+    String userName = application.getUser();
+    ApplicationId appId = application.getAppId();
+    String appIDStr = application.toString();
     LocalResourcesTracker appLocalRsrcsTracker =
     LocalResourcesTracker appLocalRsrcsTracker =
-      appRsrc.remove(ConverterUtils.toString(application.getAppId()));
-    if (null == appLocalRsrcsTracker) {
+      appRsrc.remove(ConverterUtils.toString(appId));
+    if (appLocalRsrcsTracker != null) {
+      for (LocalizedResource rsrc : appLocalRsrcsTracker ) {
+        Path localPath = rsrc.getLocalPath();
+        if (localPath != null) {
+          try {
+            stateStore.removeLocalizedResource(userName, appId, localPath);
+          } catch (IOException e) {
+            LOG.error("Unable to remove resource " + rsrc + " for " + appIDStr
+                + " from state store", e);
+          }
+        }
+      }
+    } else {
       LOG.warn("Removing uninitialized application " + application);
       LOG.warn("Removing uninitialized application " + application);
     }
     }
-    // TODO: What to do with appLocalRsrcsTracker?
 
 
     // Delete the application directories
     // Delete the application directories
-    userName = application.getUser();
-    appIDStr = application.toString();
     for (String localDir : dirsHandler.getLocalDirs()) {
     for (String localDir : dirsHandler.getLocalDirs()) {
 
 
       // Delete the user-owned app-dir
       // Delete the user-owned app-dir
@@ -668,19 +751,15 @@ public class ResourceLocalizationService extends CompositeService
         if (rsrc.getState().equals(ResourceState.DOWNLOADING)) {
         if (rsrc.getState().equals(ResourceState.DOWNLOADING)) {
           LocalResource resource = request.getResource().getRequest();
           LocalResource resource = request.getResource().getRequest();
           try {
           try {
-            Path publicDirDestPath =
+            Path publicRootPath =
                 dirsHandler.getLocalPathForWrite("." + Path.SEPARATOR
                 dirsHandler.getLocalPathForWrite("." + Path.SEPARATOR
                     + ContainerLocalizer.FILECACHE,
                     + ContainerLocalizer.FILECACHE,
                   ContainerLocalizer.getEstimatedSize(resource), true);
                   ContainerLocalizer.getEstimatedSize(resource), true);
-            Path hierarchicalPath =
-                publicRsrc.getPathForLocalization(key, publicDirDestPath);
-            if (!hierarchicalPath.equals(publicDirDestPath)) {
-              publicDirDestPath = hierarchicalPath;
+            Path publicDirDestPath =
+                publicRsrc.getPathForLocalization(key, publicRootPath);
+            if (!publicDirDestPath.getParent().equals(publicRootPath)) {
               DiskChecker.checkDir(new File(publicDirDestPath.toUri().getPath()));
               DiskChecker.checkDir(new File(publicDirDestPath.toUri().getPath()));
             }
             }
-            publicDirDestPath =
-                new Path(publicDirDestPath, Long.toString(publicRsrc
-                  .nextUniqueNumber()));
             // explicitly synchronize pending here to avoid future task
             // explicitly synchronize pending here to avoid future task
             // completing and being dequeued before pending updated
             // completing and being dequeued before pending updated
             synchronized (pending) {
             synchronized (pending) {
@@ -968,9 +1047,8 @@ public class ResourceLocalizationService extends CompositeService
       Path dirPath =
       Path dirPath =
           dirsHandler.getLocalPathForWrite(cacheDirectory,
           dirsHandler.getLocalPathForWrite(cacheDirectory,
             ContainerLocalizer.getEstimatedSize(rsrc), false);
             ContainerLocalizer.getEstimatedSize(rsrc), false);
-      dirPath = tracker.getPathForLocalization(new LocalResourceRequest(rsrc),
-        dirPath);
-      return new Path (dirPath, Long.toString(tracker.nextUniqueNumber()));
+      return tracker.getPathForLocalization(new LocalResourceRequest(rsrc),
+          dirPath);
     }
     }
 
 
     @Override
     @Override

+ 3 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceEventType.java

@@ -31,5 +31,7 @@ public enum ResourceEventType {
   /** See {@link ResourceReleaseEvent} */
   /** See {@link ResourceReleaseEvent} */
   RELEASE,
   RELEASE,
   /** See {@link ResourceFailedLocalizationEvent} */
   /** See {@link ResourceFailedLocalizationEvent} */
-  LOCALIZATION_FAILED
+  LOCALIZATION_FAILED,
+  /** See {@link ResourceRecoveredEvent} */
+  RECOVERED
 }
 }

+ 43 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceRecoveredEvent.java

@@ -0,0 +1,43 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.LocalResourceRequest;
+
+public class ResourceRecoveredEvent extends ResourceEvent {
+
+  private final Path localPath;
+  private final long size;
+
+  public ResourceRecoveredEvent(LocalResourceRequest rsrc, Path localPath,
+      long size) {
+    super(rsrc, ResourceEventType.RECOVERED);
+    this.localPath = localPath;
+    this.size = size;
+  }
+
+  public Path getLocalPath() {
+    return localPath;
+  }
+
+  public long getSize() {
+    return size;
+  }
+}

+ 377 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java

@@ -0,0 +1,377 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.recovery;
+
+import static org.fusesource.leveldbjni.JniDBFactory.asString;
+import static org.fusesource.leveldbjni.JniDBFactory.bytes;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto;
+import org.apache.hadoop.yarn.server.utils.LeveldbIterator;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.fusesource.leveldbjni.JniDBFactory;
+import org.fusesource.leveldbjni.internal.NativeDB;
+import org.iq80.leveldb.DB;
+import org.iq80.leveldb.DBException;
+import org.iq80.leveldb.Logger;
+import org.iq80.leveldb.Options;
+import org.iq80.leveldb.WriteBatch;
+
+public class NMLeveldbStateStoreService extends NMStateStoreService {
+
+  public static final Log LOG =
+      LogFactory.getLog(NMLeveldbStateStoreService.class);
+
+  private static final String DB_NAME = "yarn-nm-state";
+  private static final String DB_SCHEMA_VERSION_KEY = "schema-version";
+  private static final String DB_SCHEMA_VERSION = "1.0";
+
+  private static final String LOCALIZATION_KEY_PREFIX = "Localization/";
+  private static final String LOCALIZATION_PUBLIC_KEY_PREFIX =
+      LOCALIZATION_KEY_PREFIX + "public/";
+  private static final String LOCALIZATION_PRIVATE_KEY_PREFIX =
+      LOCALIZATION_KEY_PREFIX + "private/";
+  private static final String LOCALIZATION_STARTED_SUFFIX = "started/";
+  private static final String LOCALIZATION_COMPLETED_SUFFIX = "completed/";
+  private static final String LOCALIZATION_FILECACHE_SUFFIX = "filecache/";
+  private static final String LOCALIZATION_APPCACHE_SUFFIX = "appcache/";
+
+  private DB db;
+
+  public NMLeveldbStateStoreService() {
+    super(NMLeveldbStateStoreService.class.getName());
+  }
+
+  @Override
+  protected void startStorage() throws IOException {
+  }
+
+  @Override
+  protected void closeStorage() throws IOException {
+    if (db != null) {
+      db.close();
+    }
+  }
+
+
+  @Override
+  public RecoveredLocalizationState loadLocalizationState()
+      throws IOException {
+    RecoveredLocalizationState state = new RecoveredLocalizationState();
+
+    try {
+      LeveldbIterator iter = new LeveldbIterator(db);
+      iter.seek(bytes(LOCALIZATION_PUBLIC_KEY_PREFIX));
+      state.publicTrackerState = loadResourceTrackerState(iter,
+          LOCALIZATION_PUBLIC_KEY_PREFIX);
+
+      iter.seek(bytes(LOCALIZATION_PRIVATE_KEY_PREFIX));
+      while (iter.hasNext()) {
+        Entry<byte[],byte[]> entry = iter.peekNext();
+        String key = asString(entry.getKey());
+        if (!key.startsWith(LOCALIZATION_PRIVATE_KEY_PREFIX)) {
+          break;
+        }
+
+        int userEndPos = key.indexOf('/',
+            LOCALIZATION_PRIVATE_KEY_PREFIX.length());
+        if (userEndPos < 0) {
+          throw new IOException("Unable to determine user in resource key: "
+              + key);
+        }
+        String user = key.substring(
+            LOCALIZATION_PRIVATE_KEY_PREFIX.length(), userEndPos);
+        state.userResources.put(user, loadUserLocalizedResources(iter,
+            key.substring(0, userEndPos+1)));
+      }
+    } catch (DBException e) {
+      throw new IOException(e.getMessage(), e);
+    }
+
+    return state;
+  }
+
+  private LocalResourceTrackerState loadResourceTrackerState(
+      LeveldbIterator iter, String keyPrefix) throws IOException {
+    final String completedPrefix = keyPrefix + LOCALIZATION_COMPLETED_SUFFIX;
+    final String startedPrefix = keyPrefix + LOCALIZATION_STARTED_SUFFIX;
+    LocalResourceTrackerState state = new LocalResourceTrackerState();
+    while (iter.hasNext()) {
+      Entry<byte[],byte[]> entry = iter.peekNext();
+      String key = asString(entry.getKey());
+      if (!key.startsWith(keyPrefix)) {
+        break;
+      }
+
+      if (key.startsWith(completedPrefix)) {
+        state.localizedResources = loadCompletedResources(iter,
+            completedPrefix);
+      } else if (key.startsWith(startedPrefix)) {
+        state.inProgressResources = loadStartedResources(iter, startedPrefix);
+      } else {
+        throw new IOException("Unexpected key in resource tracker state: "
+            + key);
+      }
+    }
+
+    return state;
+  }
+
+  private List<LocalizedResourceProto> loadCompletedResources(
+      LeveldbIterator iter, String keyPrefix) throws IOException {
+    List<LocalizedResourceProto> rsrcs =
+        new ArrayList<LocalizedResourceProto>();
+    while (iter.hasNext()) {
+      Entry<byte[],byte[]> entry = iter.peekNext();
+      String key = asString(entry.getKey());
+      if (!key.startsWith(keyPrefix)) {
+        break;
+      }
+
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Loading completed resource from " + key);
+      }
+      rsrcs.add(LocalizedResourceProto.parseFrom(entry.getValue()));
+      iter.next();
+    }
+
+    return rsrcs;
+  }
+
+  private Map<LocalResourceProto, Path> loadStartedResources(
+      LeveldbIterator iter, String keyPrefix) throws IOException {
+    Map<LocalResourceProto, Path> rsrcs =
+        new HashMap<LocalResourceProto, Path>();
+    while (iter.hasNext()) {
+      Entry<byte[],byte[]> entry = iter.peekNext();
+      String key = asString(entry.getKey());
+      if (!key.startsWith(keyPrefix)) {
+        break;
+      }
+
+      Path localPath = new Path(key.substring(keyPrefix.length()));
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Loading in-progress resource at " + localPath);
+      }
+      rsrcs.put(LocalResourceProto.parseFrom(entry.getValue()), localPath);
+      iter.next();
+    }
+
+    return rsrcs;
+  }
+
+  private RecoveredUserResources loadUserLocalizedResources(
+      LeveldbIterator iter, String keyPrefix) throws IOException {
+    RecoveredUserResources userResources = new RecoveredUserResources();
+    while (iter.hasNext()) {
+      Entry<byte[],byte[]> entry = iter.peekNext();
+      String key = asString(entry.getKey());
+      if (!key.startsWith(keyPrefix)) {
+        break;
+      }
+
+      if (key.startsWith(LOCALIZATION_FILECACHE_SUFFIX, keyPrefix.length())) {
+        userResources.privateTrackerState = loadResourceTrackerState(iter,
+            keyPrefix + LOCALIZATION_FILECACHE_SUFFIX);
+      } else if (key.startsWith(LOCALIZATION_APPCACHE_SUFFIX,
+          keyPrefix.length())) {
+        int appIdStartPos = keyPrefix.length() +
+            LOCALIZATION_APPCACHE_SUFFIX.length();
+        int appIdEndPos = key.indexOf('/', appIdStartPos);
+        if (appIdEndPos < 0) {
+          throw new IOException("Unable to determine appID in resource key: "
+              + key);
+        }
+        ApplicationId appId = ConverterUtils.toApplicationId(
+            key.substring(appIdStartPos, appIdEndPos));
+        userResources.appTrackerStates.put(appId,
+            loadResourceTrackerState(iter, key.substring(0, appIdEndPos+1)));
+      } else {
+        throw new IOException("Unexpected user resource key " + key);
+      }
+    }
+    return userResources;
+  }
+
+  @Override
+  public void startResourceLocalization(String user, ApplicationId appId,
+      LocalResourceProto proto, Path localPath) throws IOException {
+    String key = getResourceStartedKey(user, appId, localPath.toString());
+    try {
+      db.put(bytes(key), proto.toByteArray());
+    } catch (DBException e) {
+      throw new IOException(e.getMessage(), e);
+    }
+  }
+
+  @Override
+  public void finishResourceLocalization(String user, ApplicationId appId,
+      LocalizedResourceProto proto) throws IOException {
+    String localPath = proto.getLocalPath();
+    String startedKey = getResourceStartedKey(user, appId, localPath);
+    String completedKey = getResourceCompletedKey(user, appId, localPath);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Storing localized resource to " + completedKey);
+    }
+    try {
+      WriteBatch batch = db.createWriteBatch();
+      try {
+        batch.delete(bytes(startedKey));
+        batch.put(bytes(completedKey), proto.toByteArray());
+        db.write(batch);
+      } finally {
+        batch.close();
+      }
+    } catch (DBException e) {
+      throw new IOException(e.getMessage(), e);
+    }
+  }
+
+  @Override
+  public void removeLocalizedResource(String user, ApplicationId appId,
+      Path localPath) throws IOException {
+    String localPathStr = localPath.toString();
+    String startedKey = getResourceStartedKey(user, appId, localPathStr);
+    String completedKey = getResourceCompletedKey(user, appId, localPathStr);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Removing local resource at " + localPathStr);
+    }
+    try {
+      WriteBatch batch = db.createWriteBatch();
+      try {
+        batch.delete(bytes(startedKey));
+        batch.delete(bytes(completedKey));
+        db.write(batch);
+      } finally {
+        batch.close();
+      }
+    } catch (DBException e) {
+      throw new IOException(e.getMessage(), e);
+    }
+  }
+
+  private String getResourceStartedKey(String user, ApplicationId appId,
+      String localPath) {
+    return getResourceTrackerKeyPrefix(user, appId)
+        + LOCALIZATION_STARTED_SUFFIX + localPath;
+  }
+
+  private String getResourceCompletedKey(String user, ApplicationId appId,
+      String localPath) {
+    return getResourceTrackerKeyPrefix(user, appId)
+        + LOCALIZATION_COMPLETED_SUFFIX + localPath;
+  }
+
+  private String getResourceTrackerKeyPrefix(String user,
+      ApplicationId appId) {
+    if (user == null) {
+      return LOCALIZATION_PUBLIC_KEY_PREFIX;
+    }
+    if (appId == null) {
+      return LOCALIZATION_PRIVATE_KEY_PREFIX + user + "/"
+          + LOCALIZATION_FILECACHE_SUFFIX;
+    }
+    return LOCALIZATION_PRIVATE_KEY_PREFIX + user + "/"
+        + LOCALIZATION_APPCACHE_SUFFIX + appId + "/";
+  }
+
+
+  @Override
+  protected void initStorage(Configuration conf)
+      throws IOException {
+    Path storeRoot = createStorageDir(conf);
+    Options options = new Options();
+    options.createIfMissing(false);
+    options.logger(new LeveldbLogger());
+    LOG.info("Using state database at " + storeRoot + " for recovery");
+    File dbfile = new File(storeRoot.toString());
+    byte[] schemaVersionData = null;
+    try {
+      db = JniDBFactory.factory.open(dbfile, options);
+      try {
+        schemaVersionData = db.get(bytes(DB_SCHEMA_VERSION_KEY));
+      } catch (DBException e) {
+        throw new IOException(e.getMessage(), e);
+      }
+    } catch (NativeDB.DBException e) {
+      if (e.isNotFound() || e.getMessage().contains(" does not exist ")) {
+        LOG.info("Creating state database at " + dbfile);
+        options.createIfMissing(true);
+        try {
+          db = JniDBFactory.factory.open(dbfile, options);
+          schemaVersionData = bytes(DB_SCHEMA_VERSION);
+          db.put(bytes(DB_SCHEMA_VERSION_KEY), schemaVersionData);
+        } catch (DBException dbErr) {
+          throw new IOException(dbErr.getMessage(), dbErr);
+        }
+      } else {
+        throw e;
+      }
+    }
+    if (schemaVersionData != null) {
+      String schemaVersion = asString(schemaVersionData);
+      // only support exact schema matches for now
+      if (!DB_SCHEMA_VERSION.equals(schemaVersion)) {
+        throw new IOException("Incompatible state database schema, found "
+            + schemaVersion + " expected " + DB_SCHEMA_VERSION);
+      }
+    } else {
+      throw new IOException("State database schema version not found");
+    }
+  }
+
+  private Path createStorageDir(Configuration conf) throws IOException {
+    final String storeUri = conf.get(YarnConfiguration.NM_RECOVERY_DIR);
+    if (storeUri == null) {
+      throw new IOException("No store location directory configured in " +
+          YarnConfiguration.NM_RECOVERY_DIR);
+    }
+
+    Path root = new Path(storeUri, DB_NAME);
+    FileSystem fs = FileSystem.getLocal(conf);
+    fs.mkdirs(root, new FsPermission((short)0700));
+    return root;
+  }
+
+
+  private static class LeveldbLogger implements Logger {
+    private static final Log LOG = LogFactory.getLog(LeveldbLogger.class);
+
+    @Override
+    public void log(String message) {
+      LOG.info(message);
+    }
+  }
+}

+ 74 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java

@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.recovery;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto;
+
+// The state store to use when state isn't being stored
+public class NMNullStateStoreService extends NMStateStoreService {
+
+  public NMNullStateStoreService() {
+    super(NMNullStateStoreService.class.getName());
+  }
+
+  @Override
+  public boolean canRecover() {
+    return false;
+  }
+
+  @Override
+  public RecoveredLocalizationState loadLocalizationState()
+      throws IOException {
+    throw new UnsupportedOperationException(
+        "Recovery not supported by this state store");
+  }
+
+  @Override
+  public void startResourceLocalization(String user, ApplicationId appId,
+      LocalResourceProto proto, Path localPath) throws IOException {
+  }
+
+  @Override
+  public void finishResourceLocalization(String user, ApplicationId appId,
+      LocalizedResourceProto proto) throws IOException {
+  }
+
+  @Override
+  public void removeLocalizedResource(String user, ApplicationId appId,
+      Path localPath) throws IOException {
+  }
+
+  @Override
+  protected void initStorage(Configuration conf) throws IOException {
+  }
+
+  @Override
+  protected void startStorage() throws IOException {
+  }
+
+  @Override
+  protected void closeStorage() throws IOException {
+  }
+}

+ 163 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java

@@ -0,0 +1,163 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.server.nodemanager.recovery;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto;
+
+@Private
+@Unstable
+public abstract class NMStateStoreService extends AbstractService {
+
+  public NMStateStoreService(String name) {
+    super(name);
+  }
+
+  public static class LocalResourceTrackerState {
+    List<LocalizedResourceProto> localizedResources =
+        new ArrayList<LocalizedResourceProto>();
+    Map<LocalResourceProto, Path> inProgressResources =
+        new HashMap<LocalResourceProto, Path>();
+
+    public List<LocalizedResourceProto> getLocalizedResources() {
+      return localizedResources;
+    }
+
+    public Map<LocalResourceProto, Path> getInProgressResources() {
+      return inProgressResources;
+    }
+
+    public boolean isEmpty() {
+      return localizedResources.isEmpty() && inProgressResources.isEmpty();
+    }
+  }
+
+  public static class RecoveredUserResources {
+    LocalResourceTrackerState privateTrackerState =
+        new LocalResourceTrackerState();
+    Map<ApplicationId, LocalResourceTrackerState> appTrackerStates =
+        new HashMap<ApplicationId, LocalResourceTrackerState>();
+
+    public LocalResourceTrackerState getPrivateTrackerState() {
+      return privateTrackerState;
+    }
+
+    public Map<ApplicationId, LocalResourceTrackerState>
+    getAppTrackerStates() {
+      return appTrackerStates;
+    }
+  }
+
+  public static class RecoveredLocalizationState {
+    LocalResourceTrackerState publicTrackerState =
+        new LocalResourceTrackerState();
+    Map<String, RecoveredUserResources> userResources =
+        new HashMap<String, RecoveredUserResources>();
+
+    public LocalResourceTrackerState getPublicTrackerState() {
+      return publicTrackerState;
+    }
+
+    public Map<String, RecoveredUserResources> getUserResources() {
+      return userResources;
+    }
+  }
+
+  /** Initialize the state storage */
+  @Override
+  public void serviceInit(Configuration conf) throws IOException {
+    initStorage(conf);
+  }
+
+  /** Start the state storage for use */
+  @Override
+  public void serviceStart() throws IOException {
+    startStorage();
+  }
+
+  /** Shutdown the state storage. */
+  @Override
+  public void serviceStop() throws IOException {
+    closeStorage();
+  }
+
+  public boolean canRecover() {
+    return true;
+  }
+
+
+  /**
+   * Load the state of localized resources
+   * @return recovered localized resource state
+   * @throws IOException
+   */
+  public abstract RecoveredLocalizationState loadLocalizationState()
+      throws IOException;
+
+  /**
+   * Record the start of localization for a resource
+   * @param user the username or null if the resource is public
+   * @param appId the application ID if the resource is app-specific or null
+   * @param proto the resource request
+   * @param localPath local filesystem path where the resource will be stored
+   * @throws IOException
+   */
+  public abstract void startResourceLocalization(String user,
+      ApplicationId appId, LocalResourceProto proto, Path localPath)
+          throws IOException;
+
+  /**
+   * Record the completion of a resource localization
+   * @param user the username or null if the resource is public
+   * @param appId the application ID if the resource is app-specific or null
+   * @param proto the serialized localized resource
+   * @throws IOException
+   */
+  public abstract void finishResourceLocalization(String user,
+      ApplicationId appId, LocalizedResourceProto proto) throws IOException;
+
+  /**
+   * Remove records related to a resource localization
+   * @param user the username or null if the resource is public
+   * @param appId the application ID if the resource is app-specific or null
+   * @param localPath local filesystem path where the resource will be stored
+   * @throws IOException
+   */
+  public abstract void removeLocalizedResource(String user,
+      ApplicationId appId, Path localPath) throws IOException;
+
+
+  protected abstract void initStorage(Configuration conf) throws IOException;
+
+  protected abstract void startStorage() throws IOException;
+
+  protected abstract void closeStorage() throws IOException;
+}

+ 31 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_recovery.proto

@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option java_package = "org.apache.hadoop.yarn.proto";
+option java_outer_classname = "YarnServerNodemanagerRecoveryProtos";
+option java_generic_services = true;
+option java_generate_equals_and_hash = true;
+package hadoop.yarn;
+
+import "yarn_protos.proto";
+
+message LocalizedResourceProto {
+  optional LocalResourceProto resource = 1;
+  optional string localPath = 2;
+  optional int64 size = 3;
+}

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java

@@ -54,6 +54,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.even
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.LogHandler;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.LogHandler;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerEvent;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 
 
 public class DummyContainerManager extends ContainerManagerImpl {
 public class DummyContainerManager extends ContainerManagerImpl {
@@ -75,7 +76,7 @@ public class DummyContainerManager extends ContainerManagerImpl {
   protected ResourceLocalizationService createResourceLocalizationService(
   protected ResourceLocalizationService createResourceLocalizationService(
       ContainerExecutor exec, DeletionService deletionContext) {
       ContainerExecutor exec, DeletionService deletionContext) {
     return new ResourceLocalizationService(super.dispatcher, exec,
     return new ResourceLocalizationService(super.dispatcher, exec,
-        deletionContext, super.dirsHandler) {
+        deletionContext, super.dirsHandler, new NMNullStateStoreService()) {
       @Override
       @Override
       public void handle(LocalizationEvent event) {
       public void handle(LocalizationEvent event) {
         switch (event.getType()) {
         switch (event.getType()) {

+ 3 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java

@@ -44,6 +44,7 @@ import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.TestContainerManager;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.TestContainerManager;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
@@ -79,7 +80,8 @@ public class TestEventFlow {
     YarnConfiguration conf = new YarnConfiguration();
     YarnConfiguration conf = new YarnConfiguration();
     
     
     Context context = new NMContext(new NMContainerTokenSecretManager(conf),
     Context context = new NMContext(new NMContainerTokenSecretManager(conf),
-        new NMTokenSecretManagerInNM(), null, null) {
+        new NMTokenSecretManagerInNM(), null, null,
+        new NMNullStateStoreService()) {
       @Override
       @Override
       public int getHttpPort() {
       public int getHttpPort() {
         return 1234;
         return 1234;

+ 30 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java

@@ -108,6 +108,36 @@ public class TestNodeManagerShutdown {
     localFS.delete(new Path(basedir.getPath()), true);
     localFS.delete(new Path(basedir.getPath()), true);
   }
   }
   
   
+  @Test
+  public void testStateStoreRemovalOnDecommission() throws IOException {
+    final File recoveryDir = new File(basedir, "nm-recovery");
+    nm = new TestNodeManager();
+    YarnConfiguration conf = createNMConfig();
+    conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
+    conf.set(YarnConfiguration.NM_RECOVERY_DIR, recoveryDir.getAbsolutePath());
+
+    // verify state store is not removed on normal shutdown
+    nm.init(conf);
+    nm.start();
+    Assert.assertTrue(recoveryDir.exists());
+    Assert.assertTrue(recoveryDir.isDirectory());
+    nm.stop();
+    nm = null;
+    Assert.assertTrue(recoveryDir.exists());
+    Assert.assertTrue(recoveryDir.isDirectory());
+
+    // verify state store is removed on decommissioned shutdown
+    nm = new TestNodeManager();
+    nm.init(conf);
+    nm.start();
+    Assert.assertTrue(recoveryDir.exists());
+    Assert.assertTrue(recoveryDir.isDirectory());
+    nm.getNMContext().setDecommissioned(true);
+    nm.stop();
+    nm = null;
+    Assert.assertFalse(recoveryDir.exists());
+  }
+
   @Test
   @Test
   public void testKillContainersOnShutdown() throws IOException,
   public void testKillContainersOnShutdown() throws IOException,
       YarnException {
       YarnException {

+ 6 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java

@@ -91,6 +91,8 @@ import org.junit.After;
 import org.junit.Assert;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.Test;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
 
 
 @SuppressWarnings("rawtypes")
 @SuppressWarnings("rawtypes")
 public class TestNodeStatusUpdater {
 public class TestNodeStatusUpdater {
@@ -1159,7 +1161,8 @@ public class TestNodeStatusUpdater {
       @Override
       @Override
       protected NMContext createNMContext(
       protected NMContext createNMContext(
           NMContainerTokenSecretManager containerTokenSecretManager,
           NMContainerTokenSecretManager containerTokenSecretManager,
-          NMTokenSecretManagerInNM nmTokenSecretManager) {
+          NMTokenSecretManagerInNM nmTokenSecretManager,
+          NMStateStoreService store) {
         return new MyNMContext(containerTokenSecretManager,
         return new MyNMContext(containerTokenSecretManager,
           nmTokenSecretManager);
           nmTokenSecretManager);
       }
       }
@@ -1268,7 +1271,8 @@ public class TestNodeStatusUpdater {
     public MyNMContext(
     public MyNMContext(
         NMContainerTokenSecretManager containerTokenSecretManager,
         NMContainerTokenSecretManager containerTokenSecretManager,
         NMTokenSecretManagerInNM nmTokenSecretManager) {
         NMTokenSecretManagerInNM nmTokenSecretManager) {
-      super(containerTokenSecretManager, nmTokenSecretManager, null, null);
+      super(containerTokenSecretManager, nmTokenSecretManager, null, null,
+          new NMNullStateStoreService());
     }
     }
 
 
     @Override
     @Override

+ 3 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java

@@ -64,6 +64,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Ap
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
@@ -103,7 +104,8 @@ public abstract class BaseContainerManagerTest {
   protected static final int HTTP_PORT = 5412;
   protected static final int HTTP_PORT = 5412;
   protected Configuration conf = new YarnConfiguration();
   protected Configuration conf = new YarnConfiguration();
   protected Context context = new NMContext(new NMContainerTokenSecretManager(
   protected Context context = new NMContext(new NMContainerTokenSecretManager(
-    conf), new NMTokenSecretManagerInNM(), null, new ApplicationACLsManager(conf)) {
+    conf), new NMTokenSecretManagerInNM(), null,
+    new ApplicationACLsManager(conf), new NMNullStateStoreService()) {
     public int getHttpPort() {
     public int getHttpPort() {
       return HTTP_PORT;
       return HTTP_PORT;
     };
     };

+ 47 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalCacheDirectoryManager.java

@@ -23,6 +23,7 @@ import org.junit.Assert;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.LocalCacheDirectoryManager.Directory;
 import org.junit.Test;
 import org.junit.Test;
 
 
 public class TestLocalCacheDirectoryManager {
 public class TestLocalCacheDirectoryManager {
@@ -73,7 +74,7 @@ public class TestLocalCacheDirectoryManager {
     conf.set(YarnConfiguration.NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY, "1");
     conf.set(YarnConfiguration.NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY, "1");
     Exception e = null;
     Exception e = null;
     ResourceLocalizationService service =
     ResourceLocalizationService service =
-        new ResourceLocalizationService(null, null, null, null);
+        new ResourceLocalizationService(null, null, null, null, null);
     try {
     try {
       service.init(conf);
       service.init(conf);
     } catch (Exception e1) {
     } catch (Exception e1) {
@@ -109,4 +110,49 @@ public class TestLocalCacheDirectoryManager {
     // first sub directory
     // first sub directory
     Assert.assertEquals(firstSubDir, dir.getRelativePathForLocalization());
     Assert.assertEquals(firstSubDir, dir.getRelativePathForLocalization());
   }
   }
+
+  @Test
+  public void testDirectoryConversion() {
+    for (int i = 0; i < 10000; ++i) {
+      String path = Directory.getRelativePath(i);
+      Assert.assertEquals("Incorrect conversion for " + i, i,
+          Directory.getDirectoryNumber(path));
+    }
+  }
+
+  @Test
+  public void testIncrementFileCountForPath() {
+    YarnConfiguration conf = new YarnConfiguration();
+    conf.setInt(YarnConfiguration.NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY,
+        LocalCacheDirectoryManager.DIRECTORIES_PER_LEVEL + 2);
+    LocalCacheDirectoryManager mgr = new LocalCacheDirectoryManager(conf);
+    final String rootPath = "";
+    mgr.incrementFileCountForPath(rootPath);
+    Assert.assertEquals(rootPath, mgr.getRelativePathForLocalization());
+    Assert.assertFalse("root dir should be full",
+        rootPath.equals(mgr.getRelativePathForLocalization()));
+    // finish filling the other directory
+    mgr.getRelativePathForLocalization();
+    // free up space in the root dir
+    mgr.decrementFileCountForPath(rootPath);
+    mgr.decrementFileCountForPath(rootPath);
+    Assert.assertEquals(rootPath, mgr.getRelativePathForLocalization());
+    Assert.assertEquals(rootPath, mgr.getRelativePathForLocalization());
+    String otherDir = mgr.getRelativePathForLocalization();
+    Assert.assertFalse("root dir should be full", otherDir.equals(rootPath));
+
+    final String deepDir0 = "d/e/e/p/0";
+    final String deepDir1 = "d/e/e/p/1";
+    final String deepDir2 = "d/e/e/p/2";
+    final String deepDir3 = "d/e/e/p/3";
+    mgr.incrementFileCountForPath(deepDir0);
+    Assert.assertEquals(otherDir, mgr.getRelativePathForLocalization());
+    Assert.assertEquals(deepDir0, mgr.getRelativePathForLocalization());
+    Assert.assertEquals("total dir count incorrect after increment",
+        deepDir1, mgr.getRelativePathForLocalization());
+    mgr.incrementFileCountForPath(deepDir2);
+    mgr.incrementFileCountForPath(deepDir1);
+    mgr.incrementFileCountForPath(deepDir2);
+    Assert.assertEquals(deepDir3, mgr.getRelativePathForLocalization());
+  }
 }
 }

+ 310 - 8
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalResourcesTrackerImpl.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer;
 
 
 import static org.mockito.Mockito.any;
 import static org.mockito.Mockito.any;
 import static org.mockito.Matchers.isA;
 import static org.mockito.Matchers.isA;
+import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.verify;
@@ -34,13 +35,17 @@ import org.junit.Assert;
 
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.LocalResourceType;
 import org.apache.hadoop.yarn.api.records.LocalResourceType;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.impl.pb.LocalResourcePBImpl;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.DrainDispatcher;
 import org.apache.hadoop.yarn.event.DrainDispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType;
@@ -52,10 +57,14 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.even
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceFailedLocalizationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceFailedLocalizationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceLocalizedEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceLocalizedEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRecoveredEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRequestEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRequestEvent;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.junit.Test;
 import org.junit.Test;
+import org.mockito.ArgumentCaptor;
 
 
 public class TestLocalResourcesTrackerImpl {
 public class TestLocalResourcesTrackerImpl {
 
 
@@ -92,8 +101,8 @@ public class TestLocalResourcesTrackerImpl {
       localrsrc.put(req1, lr1);
       localrsrc.put(req1, lr1);
       localrsrc.put(req2, lr2);
       localrsrc.put(req2, lr2);
       LocalResourcesTracker tracker =
       LocalResourcesTracker tracker =
-          new LocalResourcesTrackerImpl(user, dispatcher, localrsrc, false,
-            conf);
+          new LocalResourcesTrackerImpl(user, null, dispatcher, localrsrc,
+              false, conf, new NMNullStateStoreService());
 
 
       ResourceEvent req11Event =
       ResourceEvent req11Event =
           new ResourceRequestEvent(req1, LocalResourceVisibility.PUBLIC, lc1);
           new ResourceRequestEvent(req1, LocalResourceVisibility.PUBLIC, lc1);
@@ -176,7 +185,8 @@ public class TestLocalResourcesTrackerImpl {
       ConcurrentMap<LocalResourceRequest, LocalizedResource> localrsrc = new ConcurrentHashMap<LocalResourceRequest, LocalizedResource>();
       ConcurrentMap<LocalResourceRequest, LocalizedResource> localrsrc = new ConcurrentHashMap<LocalResourceRequest, LocalizedResource>();
       localrsrc.put(req1, lr1);
       localrsrc.put(req1, lr1);
       LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user,
       LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user,
-          dispatcher, localrsrc, false, conf);
+          null, dispatcher, localrsrc, false, conf,
+          new NMNullStateStoreService());
 
 
       ResourceEvent req11Event = new ResourceRequestEvent(req1,
       ResourceEvent req11Event = new ResourceRequestEvent(req1,
           LocalResourceVisibility.PUBLIC, lc1);
           LocalResourceVisibility.PUBLIC, lc1);
@@ -246,7 +256,8 @@ public class TestLocalResourcesTrackerImpl {
       ConcurrentMap<LocalResourceRequest, LocalizedResource> localrsrc =
       ConcurrentMap<LocalResourceRequest, LocalizedResource> localrsrc =
           new ConcurrentHashMap<LocalResourceRequest, LocalizedResource>();
           new ConcurrentHashMap<LocalResourceRequest, LocalizedResource>();
       LocalResourcesTracker tracker =
       LocalResourcesTracker tracker =
-          new LocalResourcesTrackerImpl(user, dispatcher, localrsrc, true, conf);
+          new LocalResourcesTrackerImpl(user, null, dispatcher, localrsrc,
+              true, conf, new NMNullStateStoreService());
 
 
       LocalResourceRequest lr =
       LocalResourceRequest lr =
           createLocalResourceRequest(user, 1, 1, LocalResourceVisibility.PUBLIC);
           createLocalResourceRequest(user, 1, 1, LocalResourceVisibility.PUBLIC);
@@ -264,6 +275,7 @@ public class TestLocalResourcesTrackerImpl {
 
 
       // Container-1 requesting local resource.
       // Container-1 requesting local resource.
       tracker.handle(reqEvent1);
       tracker.handle(reqEvent1);
+      dispatcher.await();
 
 
       // New localized Resource should have been added to local resource map
       // New localized Resource should have been added to local resource map
       // and the requesting container will be added to its waiting queue.
       // and the requesting container will be added to its waiting queue.
@@ -280,6 +292,7 @@ public class TestLocalResourcesTrackerImpl {
       ResourceEvent reqEvent2 =
       ResourceEvent reqEvent2 =
           new ResourceRequestEvent(lr, LocalResourceVisibility.PRIVATE, lc2);
           new ResourceRequestEvent(lr, LocalResourceVisibility.PRIVATE, lc2);
       tracker.handle(reqEvent2);
       tracker.handle(reqEvent2);
+      dispatcher.await();
 
 
       // Container 2 should have been added to the waiting queue of the local
       // Container 2 should have been added to the waiting queue of the local
       // resource
       // resource
@@ -295,6 +308,7 @@ public class TestLocalResourcesTrackerImpl {
       LocalizedResource localizedResource = localrsrc.get(lr);
       LocalizedResource localizedResource = localrsrc.get(lr);
       
       
       tracker.handle(resourceFailedEvent);
       tracker.handle(resourceFailedEvent);
+      dispatcher.await();
 
 
       // After receiving failed resource event; all waiting containers will be
       // After receiving failed resource event; all waiting containers will be
       // notified with Container Resource Failed Event.
       // notified with Container Resource Failed Event.
@@ -308,6 +322,7 @@ public class TestLocalResourcesTrackerImpl {
       // exception.
       // exception.
       ResourceReleaseEvent relEvent1 = new ResourceReleaseEvent(lr, cId1);
       ResourceReleaseEvent relEvent1 = new ResourceReleaseEvent(lr, cId1);
       tracker.handle(relEvent1);
       tracker.handle(relEvent1);
+      dispatcher.await();
 
 
       // Container-3 now requests for the same resource. This request call
       // Container-3 now requests for the same resource. This request call
       // is coming prior to Container-2's release call.
       // is coming prior to Container-2's release call.
@@ -316,6 +331,7 @@ public class TestLocalResourcesTrackerImpl {
       ResourceEvent reqEvent3 =
       ResourceEvent reqEvent3 =
           new ResourceRequestEvent(lr, LocalResourceVisibility.PRIVATE, lc3);
           new ResourceRequestEvent(lr, LocalResourceVisibility.PRIVATE, lc3);
       tracker.handle(reqEvent3);
       tracker.handle(reqEvent3);
+      dispatcher.await();
 
 
       // Local resource cache now should have the requested resource and the
       // Local resource cache now should have the requested resource and the
       // number of waiting containers should be 1.
       // number of waiting containers should be 1.
@@ -327,6 +343,7 @@ public class TestLocalResourcesTrackerImpl {
       // Container-2 Releases the resource
       // Container-2 Releases the resource
       ResourceReleaseEvent relEvent2 = new ResourceReleaseEvent(lr, cId2);
       ResourceReleaseEvent relEvent2 = new ResourceReleaseEvent(lr, cId2);
       tracker.handle(relEvent2);
       tracker.handle(relEvent2);
+      dispatcher.await();
 
 
       // Making sure that there is no change in the cache after the release.
       // Making sure that there is no change in the cache after the release.
       Assert.assertEquals(1, localrsrc.size());
       Assert.assertEquals(1, localrsrc.size());
@@ -340,6 +357,7 @@ public class TestLocalResourcesTrackerImpl {
       ResourceLocalizedEvent localizedEvent =
       ResourceLocalizedEvent localizedEvent =
           new ResourceLocalizedEvent(lr, localizedPath, 123L);
           new ResourceLocalizedEvent(lr, localizedPath, 123L);
       tracker.handle(localizedEvent);
       tracker.handle(localizedEvent);
+      dispatcher.await();
       
       
       // Verifying ContainerResourceLocalizedEvent .
       // Verifying ContainerResourceLocalizedEvent .
       verify(containerEventHandler, times(1)).handle(
       verify(containerEventHandler, times(1)).handle(
@@ -351,6 +369,7 @@ public class TestLocalResourcesTrackerImpl {
       // Container-3 releasing the resource.
       // Container-3 releasing the resource.
       ResourceReleaseEvent relEvent3 = new ResourceReleaseEvent(lr, cId3);
       ResourceReleaseEvent relEvent3 = new ResourceReleaseEvent(lr, cId3);
       tracker.handle(relEvent3);
       tracker.handle(relEvent3);
+      dispatcher.await();
       
       
       Assert.assertEquals(0, localrsrc.get(lr).getRefCount());
       Assert.assertEquals(0, localrsrc.get(lr).getRefCount());
       
       
@@ -384,7 +403,8 @@ public class TestLocalResourcesTrackerImpl {
       ConcurrentMap<LocalResourceRequest, LocalizedResource> localrsrc =
       ConcurrentMap<LocalResourceRequest, LocalizedResource> localrsrc =
           new ConcurrentHashMap<LocalResourceRequest, LocalizedResource>();
           new ConcurrentHashMap<LocalResourceRequest, LocalizedResource>();
       LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user,
       LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user,
-          dispatcher, localrsrc, true, conf);
+          null, dispatcher, localrsrc, true, conf,
+          new NMNullStateStoreService());
 
 
       // This is a random path. NO File creation will take place at this place.
       // This is a random path. NO File creation will take place at this place.
       Path localDir = new Path("/tmp");
       Path localDir = new Path("/tmp");
@@ -401,7 +421,9 @@ public class TestLocalResourcesTrackerImpl {
       tracker.handle(reqEvent1);
       tracker.handle(reqEvent1);
 
 
       // Simulate the process of localization of lr1
       // Simulate the process of localization of lr1
-      Path hierarchicalPath1 = tracker.getPathForLocalization(lr1, localDir);
+      // NOTE: Localization path from tracker has resource ID at end
+      Path hierarchicalPath1 =
+          tracker.getPathForLocalization(lr1, localDir).getParent();
       // Simulate lr1 getting localized
       // Simulate lr1 getting localized
       ResourceLocalizedEvent rle1 =
       ResourceLocalizedEvent rle1 =
           new ResourceLocalizedEvent(lr1,
           new ResourceLocalizedEvent(lr1,
@@ -417,7 +439,8 @@ public class TestLocalResourcesTrackerImpl {
           new ResourceRequestEvent(lr2, LocalResourceVisibility.PUBLIC, lc1);
           new ResourceRequestEvent(lr2, LocalResourceVisibility.PUBLIC, lc1);
       tracker.handle(reqEvent2);
       tracker.handle(reqEvent2);
 
 
-      Path hierarchicalPath2 = tracker.getPathForLocalization(lr2, localDir);
+      Path hierarchicalPath2 =
+          tracker.getPathForLocalization(lr2, localDir).getParent();
       // localization failed.
       // localization failed.
       ResourceFailedLocalizationEvent rfe2 =
       ResourceFailedLocalizationEvent rfe2 =
           new ResourceFailedLocalizationEvent(
           new ResourceFailedLocalizationEvent(
@@ -435,7 +458,8 @@ public class TestLocalResourcesTrackerImpl {
       ResourceEvent reqEvent3 = new ResourceRequestEvent(lr3,
       ResourceEvent reqEvent3 = new ResourceRequestEvent(lr3,
           LocalResourceVisibility.PUBLIC, lc1);
           LocalResourceVisibility.PUBLIC, lc1);
       tracker.handle(reqEvent3);
       tracker.handle(reqEvent3);
-      Path hierarchicalPath3 = tracker.getPathForLocalization(lr3, localDir);
+      Path hierarchicalPath3 =
+          tracker.getPathForLocalization(lr3, localDir).getParent();
       // localization successful
       // localization successful
       ResourceLocalizedEvent rle3 =
       ResourceLocalizedEvent rle3 =
           new ResourceLocalizedEvent(lr3, new Path(hierarchicalPath3.toUri()
           new ResourceLocalizedEvent(lr3, new Path(hierarchicalPath3.toUri()
@@ -479,6 +503,284 @@ public class TestLocalResourcesTrackerImpl {
     }
     }
   }
   }
 
 
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testStateStoreSuccessfulLocalization() throws Exception {
+    final String user = "someuser";
+    final ApplicationId appId = ApplicationId.newInstance(1, 1);
+    // This is a random path. NO File creation will take place at this place.
+    final Path localDir = new Path("/tmp");
+    Configuration conf = new YarnConfiguration();
+    DrainDispatcher dispatcher = null;
+    dispatcher = createDispatcher(conf);
+    EventHandler<LocalizerEvent> localizerEventHandler =
+        mock(EventHandler.class);
+    EventHandler<LocalizerEvent> containerEventHandler =
+        mock(EventHandler.class);
+    dispatcher.register(LocalizerEventType.class, localizerEventHandler);
+    dispatcher.register(ContainerEventType.class, containerEventHandler);
+    DeletionService mockDelService = mock(DeletionService.class);
+    NMStateStoreService stateStore = mock(NMStateStoreService.class);
+
+    try {
+      LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user,
+          appId, dispatcher, false, conf, stateStore);
+      // Container 1 needs lr1 resource
+      ContainerId cId1 = BuilderUtils.newContainerId(1, 1, 1, 1);
+      LocalResourceRequest lr1 = createLocalResourceRequest(user, 1, 1,
+          LocalResourceVisibility.APPLICATION);
+      LocalizerContext lc1 = new LocalizerContext(user, cId1, null);
+
+      // Container 1 requests lr1 to be localized
+      ResourceEvent reqEvent1 = new ResourceRequestEvent(lr1,
+          LocalResourceVisibility.APPLICATION, lc1);
+      tracker.handle(reqEvent1);
+      dispatcher.await();
+
+      // Simulate the process of localization of lr1
+      Path hierarchicalPath1 = tracker.getPathForLocalization(lr1, localDir);
+
+      ArgumentCaptor<LocalResourceProto> localResourceCaptor =
+          ArgumentCaptor.forClass(LocalResourceProto.class);
+      ArgumentCaptor<Path> pathCaptor = ArgumentCaptor.forClass(Path.class);
+      verify(stateStore).startResourceLocalization(eq(user), eq(appId),
+          localResourceCaptor.capture(), pathCaptor.capture());
+      LocalResourceProto lrProto = localResourceCaptor.getValue();
+      Path localizedPath1 = pathCaptor.getValue();
+      Assert.assertEquals(lr1,
+          new LocalResourceRequest(new LocalResourcePBImpl(lrProto)));
+      Assert.assertEquals(hierarchicalPath1, localizedPath1.getParent());
+
+      // Simulate lr1 getting localized
+      ResourceLocalizedEvent rle1 =
+          new ResourceLocalizedEvent(lr1, pathCaptor.getValue(), 120);
+      tracker.handle(rle1);
+      dispatcher.await();
+
+      ArgumentCaptor<LocalizedResourceProto> localizedProtoCaptor =
+          ArgumentCaptor.forClass(LocalizedResourceProto.class);
+      verify(stateStore).finishResourceLocalization(eq(user), eq(appId),
+          localizedProtoCaptor.capture());
+      LocalizedResourceProto localizedProto = localizedProtoCaptor.getValue();
+      Assert.assertEquals(lr1, new LocalResourceRequest(
+          new LocalResourcePBImpl(localizedProto.getResource())));
+      Assert.assertEquals(localizedPath1.toString(),
+          localizedProto.getLocalPath());
+      LocalizedResource localizedRsrc1 = tracker.getLocalizedResource(lr1);
+      Assert.assertNotNull(localizedRsrc1);
+
+      // simulate release and retention processing
+      tracker.handle(new ResourceReleaseEvent(lr1, cId1));
+      dispatcher.await();
+      boolean removeResult = tracker.remove(localizedRsrc1, mockDelService);
+
+      Assert.assertTrue(removeResult);
+      verify(stateStore).removeLocalizedResource(eq(user), eq(appId),
+          eq(localizedPath1));
+    } finally {
+      if (dispatcher != null) {
+        dispatcher.stop();
+      }
+    }
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testStateStoreFailedLocalization() throws Exception {
+    final String user = "someuser";
+    final ApplicationId appId = ApplicationId.newInstance(1, 1);
+    // This is a random path. NO File creation will take place at this place.
+    final Path localDir = new Path("/tmp");
+    Configuration conf = new YarnConfiguration();
+    DrainDispatcher dispatcher = null;
+    dispatcher = createDispatcher(conf);
+    EventHandler<LocalizerEvent> localizerEventHandler =
+        mock(EventHandler.class);
+    EventHandler<LocalizerEvent> containerEventHandler =
+        mock(EventHandler.class);
+    dispatcher.register(LocalizerEventType.class, localizerEventHandler);
+    dispatcher.register(ContainerEventType.class, containerEventHandler);
+    NMStateStoreService stateStore = mock(NMStateStoreService.class);
+
+    try {
+      LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user,
+          appId, dispatcher, false, conf, stateStore);
+      // Container 1 needs lr1 resource
+      ContainerId cId1 = BuilderUtils.newContainerId(1, 1, 1, 1);
+      LocalResourceRequest lr1 = createLocalResourceRequest(user, 1, 1,
+          LocalResourceVisibility.APPLICATION);
+      LocalizerContext lc1 = new LocalizerContext(user, cId1, null);
+
+      // Container 1 requests lr1 to be localized
+      ResourceEvent reqEvent1 = new ResourceRequestEvent(lr1,
+          LocalResourceVisibility.APPLICATION, lc1);
+      tracker.handle(reqEvent1);
+      dispatcher.await();
+
+      // Simulate the process of localization of lr1
+      Path hierarchicalPath1 = tracker.getPathForLocalization(lr1, localDir);
+
+      ArgumentCaptor<LocalResourceProto> localResourceCaptor =
+          ArgumentCaptor.forClass(LocalResourceProto.class);
+      ArgumentCaptor<Path> pathCaptor = ArgumentCaptor.forClass(Path.class);
+      verify(stateStore).startResourceLocalization(eq(user), eq(appId),
+          localResourceCaptor.capture(), pathCaptor.capture());
+      LocalResourceProto lrProto = localResourceCaptor.getValue();
+      Path localizedPath1 = pathCaptor.getValue();
+      Assert.assertEquals(lr1,
+          new LocalResourceRequest(new LocalResourcePBImpl(lrProto)));
+      Assert.assertEquals(hierarchicalPath1, localizedPath1.getParent());
+
+      ResourceFailedLocalizationEvent rfe1 =
+          new ResourceFailedLocalizationEvent(
+              lr1, new Exception("Test").toString());
+      tracker.handle(rfe1);
+      dispatcher.await();
+      verify(stateStore).removeLocalizedResource(eq(user), eq(appId),
+          eq(localizedPath1));
+    } finally {
+      if (dispatcher != null) {
+        dispatcher.stop();
+      }
+    }
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testRecoveredResource() throws Exception {
+    final String user = "someuser";
+    final ApplicationId appId = ApplicationId.newInstance(1, 1);
+    // This is a random path. NO File creation will take place at this place.
+    final Path localDir = new Path("/tmp/localdir");
+    Configuration conf = new YarnConfiguration();
+    DrainDispatcher dispatcher = null;
+    dispatcher = createDispatcher(conf);
+    EventHandler<LocalizerEvent> localizerEventHandler =
+        mock(EventHandler.class);
+    EventHandler<LocalizerEvent> containerEventHandler =
+        mock(EventHandler.class);
+    dispatcher.register(LocalizerEventType.class, localizerEventHandler);
+    dispatcher.register(ContainerEventType.class, containerEventHandler);
+    NMStateStoreService stateStore = mock(NMStateStoreService.class);
+
+    try {
+      LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user,
+          appId, dispatcher, false, conf, stateStore);
+      // Container 1 needs lr1 resource
+      ContainerId cId1 = BuilderUtils.newContainerId(1, 1, 1, 1);
+      LocalResourceRequest lr1 = createLocalResourceRequest(user, 1, 1,
+          LocalResourceVisibility.APPLICATION);
+      Assert.assertNull(tracker.getLocalizedResource(lr1));
+      final long localizedId1 = 52;
+      Path hierarchicalPath1 = new Path(localDir,
+          Long.toString(localizedId1));
+      Path localizedPath1 = new Path(hierarchicalPath1, "resource.jar");
+      tracker.handle(new ResourceRecoveredEvent(lr1, localizedPath1, 120));
+      dispatcher.await();
+      Assert.assertNotNull(tracker.getLocalizedResource(lr1));
+
+      // verify new paths reflect recovery of previous resources
+      LocalResourceRequest lr2 = createLocalResourceRequest(user, 2, 2,
+          LocalResourceVisibility.APPLICATION);
+      LocalizerContext lc2 = new LocalizerContext(user, cId1, null);
+      ResourceEvent reqEvent2 = new ResourceRequestEvent(lr2,
+          LocalResourceVisibility.APPLICATION, lc2);
+      tracker.handle(reqEvent2);
+      dispatcher.await();
+      Path hierarchicalPath2 = tracker.getPathForLocalization(lr2, localDir);
+      long localizedId2 = Long.parseLong(hierarchicalPath2.getName());
+      Assert.assertEquals(localizedId1 + 1, localizedId2);
+    } finally {
+      if (dispatcher != null) {
+        dispatcher.stop();
+      }
+    }
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testRecoveredResourceWithDirCacheMgr() throws Exception {
+    final String user = "someuser";
+    final ApplicationId appId = ApplicationId.newInstance(1, 1);
+    // This is a random path. NO File creation will take place at this place.
+    final Path localDirRoot = new Path("/tmp/localdir");
+    Configuration conf = new YarnConfiguration();
+    DrainDispatcher dispatcher = null;
+    dispatcher = createDispatcher(conf);
+    EventHandler<LocalizerEvent> localizerEventHandler =
+        mock(EventHandler.class);
+    EventHandler<LocalizerEvent> containerEventHandler =
+        mock(EventHandler.class);
+    dispatcher.register(LocalizerEventType.class, localizerEventHandler);
+    dispatcher.register(ContainerEventType.class, containerEventHandler);
+    NMStateStoreService stateStore = mock(NMStateStoreService.class);
+
+    try {
+      LocalResourcesTrackerImpl tracker = new LocalResourcesTrackerImpl(user,
+          appId, dispatcher, true, conf, stateStore);
+      LocalResourceRequest lr1 = createLocalResourceRequest(user, 1, 1,
+          LocalResourceVisibility.PUBLIC);
+      Assert.assertNull(tracker.getLocalizedResource(lr1));
+      final long localizedId1 = 52;
+      Path hierarchicalPath1 = new Path(localDirRoot + "/4/2",
+          Long.toString(localizedId1));
+      Path localizedPath1 = new Path(hierarchicalPath1, "resource.jar");
+      tracker.handle(new ResourceRecoveredEvent(lr1, localizedPath1, 120));
+      dispatcher.await();
+      Assert.assertNotNull(tracker.getLocalizedResource(lr1));
+      LocalCacheDirectoryManager dirMgrRoot =
+          tracker.getDirectoryManager(localDirRoot);
+      Assert.assertEquals(0, dirMgrRoot.getDirectory("").getCount());
+      Assert.assertEquals(1, dirMgrRoot.getDirectory("4/2").getCount());
+
+      LocalResourceRequest lr2 = createLocalResourceRequest(user, 2, 2,
+          LocalResourceVisibility.PUBLIC);
+      Assert.assertNull(tracker.getLocalizedResource(lr2));
+      final long localizedId2 = localizedId1 + 1;
+      Path hierarchicalPath2 = new Path(localDirRoot + "/4/2",
+          Long.toString(localizedId2));
+      Path localizedPath2 = new Path(hierarchicalPath2, "resource.jar");
+      tracker.handle(new ResourceRecoveredEvent(lr2, localizedPath2, 120));
+      dispatcher.await();
+      Assert.assertNotNull(tracker.getLocalizedResource(lr2));
+      Assert.assertEquals(0, dirMgrRoot.getDirectory("").getCount());
+      Assert.assertEquals(2, dirMgrRoot.getDirectory("4/2").getCount());
+
+      LocalResourceRequest lr3 = createLocalResourceRequest(user, 3, 3,
+          LocalResourceVisibility.PUBLIC);
+      Assert.assertNull(tracker.getLocalizedResource(lr3));
+      final long localizedId3 = 128;
+      Path hierarchicalPath3 = new Path(localDirRoot + "/4/3",
+          Long.toString(localizedId3));
+      Path localizedPath3 = new Path(hierarchicalPath3, "resource.jar");
+      tracker.handle(new ResourceRecoveredEvent(lr3, localizedPath3, 120));
+      dispatcher.await();
+      Assert.assertNotNull(tracker.getLocalizedResource(lr3));
+      Assert.assertEquals(0, dirMgrRoot.getDirectory("").getCount());
+      Assert.assertEquals(2, dirMgrRoot.getDirectory("4/2").getCount());
+      Assert.assertEquals(1, dirMgrRoot.getDirectory("4/3").getCount());
+
+      LocalResourceRequest lr4 = createLocalResourceRequest(user, 4, 4,
+          LocalResourceVisibility.PUBLIC);
+      Assert.assertNull(tracker.getLocalizedResource(lr4));
+      final long localizedId4 = 256;
+      Path hierarchicalPath4 = new Path(localDirRoot + "/4",
+          Long.toString(localizedId4));
+      Path localizedPath4 = new Path(hierarchicalPath4, "resource.jar");
+      tracker.handle(new ResourceRecoveredEvent(lr4, localizedPath4, 120));
+      dispatcher.await();
+      Assert.assertNotNull(tracker.getLocalizedResource(lr4));
+      Assert.assertEquals(0, dirMgrRoot.getDirectory("").getCount());
+      Assert.assertEquals(1, dirMgrRoot.getDirectory("4").getCount());
+      Assert.assertEquals(2, dirMgrRoot.getDirectory("4/2").getCount());
+      Assert.assertEquals(1, dirMgrRoot.getDirectory("4/3").getCount());
+    } finally {
+      if (dispatcher != null) {
+        dispatcher.stop();
+      }
+    }
+  }
+
   private boolean createdummylocalizefile(Path path) {
   private boolean createdummylocalizefile(Path path) {
     boolean ret = false;
     boolean ret = false;
     File file = new File(path.toUri().getRawPath().toString());
     File file = new File(path.toUri().getRawPath().toString());

+ 270 - 14
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java

@@ -19,6 +19,8 @@
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer;
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer;
 
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Matchers.anyBoolean;
 import static org.mockito.Matchers.anyBoolean;
 import static org.mockito.Matchers.anyInt;
 import static org.mockito.Matchers.anyInt;
@@ -120,6 +122,10 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.even
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerResourceRequestEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerResourceRequestEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceFailedLocalizationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceFailedLocalizationEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceLocalizedEvent;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.junit.After;
 import org.junit.After;
@@ -188,7 +194,8 @@ public class TestResourceLocalizationService {
 
 
     ResourceLocalizationService locService =
     ResourceLocalizationService locService =
       spy(new ResourceLocalizationService(dispatcher, exec, delService,
       spy(new ResourceLocalizationService(dispatcher, exec, delService,
-                                          diskhandler));
+                                          diskhandler,
+                                          new NMNullStateStoreService()));
     doReturn(lfs)
     doReturn(lfs)
       .when(locService).getLocalFileContext(isA(Configuration.class));
       .when(locService).getLocalFileContext(isA(Configuration.class));
     try {
     try {
@@ -253,7 +260,8 @@ public class TestResourceLocalizationService {
 
 
     ResourceLocalizationService rawService =
     ResourceLocalizationService rawService =
       new ResourceLocalizationService(dispatcher, exec, delService,
       new ResourceLocalizationService(dispatcher, exec, delService,
-                                      dirsHandler);
+                                      dirsHandler,
+                                      new NMNullStateStoreService());
     ResourceLocalizationService spyService = spy(rawService);
     ResourceLocalizationService spyService = spy(rawService);
     doReturn(mockServer).when(spyService).createServer();
     doReturn(mockServer).when(spyService).createServer();
     doReturn(mockLocallilzerTracker).when(spyService).createLocalizerTracker(
     doReturn(mockLocallilzerTracker).when(spyService).createLocalizerTracker(
@@ -287,7 +295,7 @@ public class TestResourceLocalizationService {
               user, appId);
               user, appId);
 
 
       // init container.
       // init container.
-      final Container c = getMockContainer(appId, 42);
+      final Container c = getMockContainer(appId, 42, user);
       
       
       // init resources
       // init resources
       Random r = new Random();
       Random r = new Random();
@@ -402,6 +410,233 @@ public class TestResourceLocalizationService {
     }
     }
   }
   }
   
   
+  @Test
+  @SuppressWarnings("unchecked") // mocked generics
+  public void testRecovery() throws Exception {
+    final String user1 = "user1";
+    final String user2 = "user2";
+    final ApplicationId appId1 = ApplicationId.newInstance(1, 1);
+    final ApplicationId appId2 = ApplicationId.newInstance(1, 2);
+
+    List<Path> localDirs = new ArrayList<Path>();
+    String[] sDirs = new String[4];
+    for (int i = 0; i < 4; ++i) {
+      localDirs.add(lfs.makeQualified(new Path(basedir, i + "")));
+      sDirs[i] = localDirs.get(i).toString();
+    }
+    conf.setStrings(YarnConfiguration.NM_LOCAL_DIRS, sDirs);
+    conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
+
+    NMMemoryStateStoreService stateStore = new NMMemoryStateStoreService();
+    stateStore.init(conf);
+    stateStore.start();
+    DrainDispatcher dispatcher = new DrainDispatcher();
+    dispatcher.init(conf);
+    dispatcher.start();
+    EventHandler<ApplicationEvent> applicationBus = mock(EventHandler.class);
+    dispatcher.register(ApplicationEventType.class, applicationBus);
+    EventHandler<ContainerEvent> containerBus = mock(EventHandler.class);
+    dispatcher.register(ContainerEventType.class, containerBus);
+    //Ignore actual localization
+    EventHandler<LocalizerEvent> localizerBus = mock(EventHandler.class);
+    dispatcher.register(LocalizerEventType.class, localizerBus);
+
+    LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
+    dirsHandler.init(conf);
+
+    ResourceLocalizationService spyService =
+        createSpyService(dispatcher, dirsHandler, stateStore);
+    try {
+      spyService.init(conf);
+      spyService.start();
+
+      final Application app1 = mock(Application.class);
+      when(app1.getUser()).thenReturn(user1);
+      when(app1.getAppId()).thenReturn(appId1);
+      final Application app2 = mock(Application.class);
+      when(app2.getUser()).thenReturn(user2);
+      when(app2.getAppId()).thenReturn(appId2);
+      spyService.handle(new ApplicationLocalizationEvent(
+          LocalizationEventType.INIT_APPLICATION_RESOURCES, app1));
+      spyService.handle(new ApplicationLocalizationEvent(
+          LocalizationEventType.INIT_APPLICATION_RESOURCES, app2));
+      dispatcher.await();
+
+      //Get a handle on the trackers after they're setup with INIT_APP_RESOURCES
+      LocalResourcesTracker appTracker1 =
+          spyService.getLocalResourcesTracker(
+              LocalResourceVisibility.APPLICATION, user1, appId1);
+      LocalResourcesTracker privTracker1 =
+          spyService.getLocalResourcesTracker(LocalResourceVisibility.PRIVATE,
+              user1, null);
+      LocalResourcesTracker appTracker2 =
+          spyService.getLocalResourcesTracker(
+              LocalResourceVisibility.APPLICATION, user2, appId2);
+      LocalResourcesTracker pubTracker =
+          spyService.getLocalResourcesTracker(LocalResourceVisibility.PUBLIC,
+              null, null);
+
+      // init containers
+      final Container c1 = getMockContainer(appId1, 1, user1);
+      final Container c2 = getMockContainer(appId2, 2, user2);
+
+      // init resources
+      Random r = new Random();
+      long seed = r.nextLong();
+      System.out.println("SEED: " + seed);
+      r.setSeed(seed);
+
+      // Send localization requests of each type.
+      final LocalResource privResource1 = getPrivateMockedResource(r);
+      final LocalResourceRequest privReq1 =
+          new LocalResourceRequest(privResource1);
+      final LocalResource privResource2 = getPrivateMockedResource(r);
+      final LocalResourceRequest privReq2 =
+          new LocalResourceRequest(privResource2);
+
+      final LocalResource pubResource1 = getPublicMockedResource(r);
+      final LocalResourceRequest pubReq1 =
+          new LocalResourceRequest(pubResource1);
+      final LocalResource pubResource2 = getPublicMockedResource(r);
+      final LocalResourceRequest pubReq2 =
+          new LocalResourceRequest(pubResource2);
+
+      final LocalResource appResource1 = getAppMockedResource(r);
+      final LocalResourceRequest appReq1 =
+          new LocalResourceRequest(appResource1);
+      final LocalResource appResource2 = getAppMockedResource(r);
+      final LocalResourceRequest appReq2 =
+          new LocalResourceRequest(appResource2);
+      final LocalResource appResource3 = getAppMockedResource(r);
+      final LocalResourceRequest appReq3 =
+          new LocalResourceRequest(appResource3);
+
+      Map<LocalResourceVisibility, Collection<LocalResourceRequest>> req1 =
+          new HashMap<LocalResourceVisibility,
+                      Collection<LocalResourceRequest>>();
+      req1.put(LocalResourceVisibility.PRIVATE,
+          Arrays.asList(new LocalResourceRequest[] { privReq1, privReq2 }));
+      req1.put(LocalResourceVisibility.PUBLIC,
+          Collections.singletonList(pubReq1));
+      req1.put(LocalResourceVisibility.APPLICATION,
+          Collections.singletonList(appReq1));
+
+      Map<LocalResourceVisibility, Collection<LocalResourceRequest>> req2 =
+        new HashMap<LocalResourceVisibility,
+                    Collection<LocalResourceRequest>>();
+      req2.put(LocalResourceVisibility.APPLICATION,
+          Arrays.asList(new LocalResourceRequest[] { appReq2, appReq3 }));
+      req2.put(LocalResourceVisibility.PUBLIC,
+          Collections.singletonList(pubReq2));
+
+      // Send Request event
+      spyService.handle(new ContainerLocalizationRequestEvent(c1, req1));
+      spyService.handle(new ContainerLocalizationRequestEvent(c2, req2));
+      dispatcher.await();
+
+      // Simulate start of localization for all resources
+      privTracker1.getPathForLocalization(privReq1,
+          dirsHandler.getLocalPathForWrite(
+              ContainerLocalizer.USERCACHE + user1));
+      privTracker1.getPathForLocalization(privReq2,
+          dirsHandler.getLocalPathForWrite(
+              ContainerLocalizer.USERCACHE + user1));
+      LocalizedResource privLr1 = privTracker1.getLocalizedResource(privReq1);
+      LocalizedResource privLr2 = privTracker1.getLocalizedResource(privReq2);
+      appTracker1.getPathForLocalization(appReq1,
+          dirsHandler.getLocalPathForWrite(
+              ContainerLocalizer.APPCACHE + appId1));
+      LocalizedResource appLr1 = appTracker1.getLocalizedResource(appReq1);
+      appTracker2.getPathForLocalization(appReq2,
+          dirsHandler.getLocalPathForWrite(
+              ContainerLocalizer.APPCACHE + appId2));
+      LocalizedResource appLr2 = appTracker2.getLocalizedResource(appReq2);
+      appTracker2.getPathForLocalization(appReq3,
+          dirsHandler.getLocalPathForWrite(
+              ContainerLocalizer.APPCACHE + appId2));
+      LocalizedResource appLr3 = appTracker2.getLocalizedResource(appReq3);
+      pubTracker.getPathForLocalization(pubReq1,
+          dirsHandler.getLocalPathForWrite(ContainerLocalizer.FILECACHE));
+      LocalizedResource pubLr1 = pubTracker.getLocalizedResource(pubReq1);
+      pubTracker.getPathForLocalization(pubReq2,
+          dirsHandler.getLocalPathForWrite(ContainerLocalizer.FILECACHE));
+      LocalizedResource pubLr2 = pubTracker.getLocalizedResource(pubReq2);
+
+      // Simulate completion of localization for most resources with
+      // possibly different sizes than in the request
+      assertNotNull("Localization not started", privLr1.getLocalPath());
+      privTracker1.handle(new ResourceLocalizedEvent(privReq1,
+          privLr1.getLocalPath(), privLr1.getSize() + 5));
+      assertNotNull("Localization not started", privLr2.getLocalPath());
+      privTracker1.handle(new ResourceLocalizedEvent(privReq2,
+          privLr2.getLocalPath(), privLr2.getSize() + 10));
+      assertNotNull("Localization not started", appLr1.getLocalPath());
+      appTracker1.handle(new ResourceLocalizedEvent(appReq1,
+          appLr1.getLocalPath(), appLr1.getSize()));
+      assertNotNull("Localization not started", appLr3.getLocalPath());
+      appTracker2.handle(new ResourceLocalizedEvent(appReq3,
+          appLr3.getLocalPath(), appLr3.getSize() + 7));
+      assertNotNull("Localization not started", pubLr1.getLocalPath());
+      pubTracker.handle(new ResourceLocalizedEvent(pubReq1,
+          pubLr1.getLocalPath(), pubLr1.getSize() + 1000));
+      assertNotNull("Localization not started", pubLr2.getLocalPath());
+      pubTracker.handle(new ResourceLocalizedEvent(pubReq2,
+          pubLr2.getLocalPath(), pubLr2.getSize() + 99999));
+
+      dispatcher.await();
+      assertEquals(ResourceState.LOCALIZED, privLr1.getState());
+      assertEquals(ResourceState.LOCALIZED, privLr2.getState());
+      assertEquals(ResourceState.LOCALIZED, appLr1.getState());
+      assertEquals(ResourceState.DOWNLOADING, appLr2.getState());
+      assertEquals(ResourceState.LOCALIZED, appLr3.getState());
+      assertEquals(ResourceState.LOCALIZED, pubLr1.getState());
+      assertEquals(ResourceState.LOCALIZED, pubLr2.getState());
+
+      // restart and recover
+      spyService = createSpyService(dispatcher, dirsHandler, stateStore);
+      spyService.init(conf);
+      spyService.recoverLocalizedResources(
+          stateStore.loadLocalizationState());
+      dispatcher.await();
+
+      appTracker1 = spyService.getLocalResourcesTracker(
+              LocalResourceVisibility.APPLICATION, user1, appId1);
+      privTracker1 = spyService.getLocalResourcesTracker(
+          LocalResourceVisibility.PRIVATE, user1, null);
+      appTracker2 = spyService.getLocalResourcesTracker(
+              LocalResourceVisibility.APPLICATION, user2, appId2);
+      pubTracker = spyService.getLocalResourcesTracker(
+          LocalResourceVisibility.PUBLIC, null, null);
+
+      LocalizedResource recoveredRsrc =
+          privTracker1.getLocalizedResource(privReq1);
+      assertEquals(privReq1, recoveredRsrc.getRequest());
+      assertEquals(privLr1.getLocalPath(), recoveredRsrc.getLocalPath());
+      assertEquals(privLr1.getSize(), recoveredRsrc.getSize());
+      assertEquals(ResourceState.LOCALIZED, recoveredRsrc.getState());
+      recoveredRsrc = privTracker1.getLocalizedResource(privReq2);
+      assertEquals(privReq2, recoveredRsrc.getRequest());
+      assertEquals(privLr2.getLocalPath(), recoveredRsrc.getLocalPath());
+      assertEquals(privLr2.getSize(), recoveredRsrc.getSize());
+      assertEquals(ResourceState.LOCALIZED, recoveredRsrc.getState());
+      recoveredRsrc = appTracker1.getLocalizedResource(appReq1);
+      assertEquals(appReq1, recoveredRsrc.getRequest());
+      assertEquals(appLr1.getLocalPath(), recoveredRsrc.getLocalPath());
+      assertEquals(appLr1.getSize(), recoveredRsrc.getSize());
+      assertEquals(ResourceState.LOCALIZED, recoveredRsrc.getState());
+      recoveredRsrc = appTracker2.getLocalizedResource(appReq2);
+      assertNull("in-progress resource should not be present", recoveredRsrc);
+      recoveredRsrc = appTracker2.getLocalizedResource(appReq3);
+      assertEquals(appReq3, recoveredRsrc.getRequest());
+      assertEquals(appLr3.getLocalPath(), recoveredRsrc.getLocalPath());
+      assertEquals(appLr3.getSize(), recoveredRsrc.getSize());
+      assertEquals(ResourceState.LOCALIZED, recoveredRsrc.getState());
+    } finally {
+      dispatcher.stop();
+      stateStore.close();
+    }
+  }
+
   @Test( timeout = 10000)
   @Test( timeout = 10000)
   @SuppressWarnings("unchecked") // mocked generics
   @SuppressWarnings("unchecked") // mocked generics
   public void testLocalizationHeartbeat() throws Exception {
   public void testLocalizationHeartbeat() throws Exception {
@@ -436,7 +671,8 @@ public class TestResourceLocalizationService {
 
 
     ResourceLocalizationService rawService =
     ResourceLocalizationService rawService =
       new ResourceLocalizationService(dispatcher, exec, delService,
       new ResourceLocalizationService(dispatcher, exec, delService,
-                                      dirsHandler);
+                                      dirsHandler,
+                                      new NMNullStateStoreService());
     ResourceLocalizationService spyService = spy(rawService);
     ResourceLocalizationService spyService = spy(rawService);
     doReturn(mockServer).when(spyService).createServer();
     doReturn(mockServer).when(spyService).createServer();
     doReturn(lfs).when(spyService).getLocalFileContext(isA(Configuration.class));
     doReturn(lfs).when(spyService).getLocalFileContext(isA(Configuration.class));
@@ -469,7 +705,7 @@ public class TestResourceLocalizationService {
       long seed = r.nextLong();
       long seed = r.nextLong();
       System.out.println("SEED: " + seed);
       System.out.println("SEED: " + seed);
       r.setSeed(seed);
       r.setSeed(seed);
-      final Container c = getMockContainer(appId, 42);
+      final Container c = getMockContainer(appId, 42, "user0");
       FSDataOutputStream out =
       FSDataOutputStream out =
         new FSDataOutputStream(new DataOutputBuffer(), null);
         new FSDataOutputStream(new DataOutputBuffer(), null);
       doReturn(out).when(spylfs).createInternal(isA(Path.class),
       doReturn(out).when(spylfs).createInternal(isA(Path.class),
@@ -616,7 +852,8 @@ public class TestResourceLocalizationService {
     try {
     try {
       ResourceLocalizationService rawService =
       ResourceLocalizationService rawService =
           new ResourceLocalizationService(dispatcher, exec, delService,
           new ResourceLocalizationService(dispatcher, exec, delService,
-                                        dirsHandler);
+                                        dirsHandler,
+                                        new NMNullStateStoreService());
       ResourceLocalizationService spyService = spy(rawService);
       ResourceLocalizationService spyService = spy(rawService);
       doReturn(mockServer).when(spyService).createServer();
       doReturn(mockServer).when(spyService).createServer();
       doReturn(lfs).when(spyService).getLocalFileContext(
       doReturn(lfs).when(spyService).getLocalFileContext(
@@ -637,7 +874,7 @@ public class TestResourceLocalizationService {
       dispatcher.await();
       dispatcher.await();
 
 
       // init container.
       // init container.
-      final Container c = getMockContainer(appId, 42);
+      final Container c = getMockContainer(appId, 42, user);
 
 
       // init resources
       // init resources
       Random r = new Random();
       Random r = new Random();
@@ -725,7 +962,7 @@ public class TestResourceLocalizationService {
     try {
     try {
       ResourceLocalizationService rawService =
       ResourceLocalizationService rawService =
           new ResourceLocalizationService(dispatcher, exec, delService,
           new ResourceLocalizationService(dispatcher, exec, delService,
-            dirsHandlerSpy);
+            dirsHandlerSpy, new NMNullStateStoreService());
       ResourceLocalizationService spyService = spy(rawService);
       ResourceLocalizationService spyService = spy(rawService);
       doReturn(mockServer).when(spyService).createServer();
       doReturn(mockServer).when(spyService).createServer();
       doReturn(lfs).when(spyService).getLocalFileContext(
       doReturn(lfs).when(spyService).getLocalFileContext(
@@ -758,7 +995,7 @@ public class TestResourceLocalizationService {
         .put(LocalResourceVisibility.PUBLIC, Collections.singletonList(pubReq));
         .put(LocalResourceVisibility.PUBLIC, Collections.singletonList(pubReq));
 
 
       // init container.
       // init container.
-      final Container c = getMockContainer(appId, 42);
+      final Container c = getMockContainer(appId, 42, user);
 
 
       // first test ioexception
       // first test ioexception
       Mockito
       Mockito
@@ -838,7 +1075,7 @@ public class TestResourceLocalizationService {
 
 
       ResourceLocalizationService rls =
       ResourceLocalizationService rls =
           new ResourceLocalizationService(dispatcher1, exec, delService,
           new ResourceLocalizationService(dispatcher1, exec, delService,
-            localDirHandler);
+            localDirHandler, new NMNullStateStoreService());
       dispatcher1.register(LocalizationEventType.class, rls);
       dispatcher1.register(LocalizationEventType.class, rls);
       rls.init(conf);
       rls.init(conf);
 
 
@@ -991,7 +1228,7 @@ public class TestResourceLocalizationService {
 
 
       ResourceLocalizationService rls =
       ResourceLocalizationService rls =
           new ResourceLocalizationService(dispatcher1, exec, delService,
           new ResourceLocalizationService(dispatcher1, exec, delService,
-            localDirHandler);
+            localDirHandler, new NMNullStateStoreService());
       dispatcher1.register(LocalizationEventType.class, rls);
       dispatcher1.register(LocalizationEventType.class, rls);
       rls.init(conf);
       rls.init(conf);
 
 
@@ -1157,7 +1394,7 @@ public class TestResourceLocalizationService {
       // it as otherwise it will remove requests from pending queue.
       // it as otherwise it will remove requests from pending queue.
       ResourceLocalizationService rawService =
       ResourceLocalizationService rawService =
           new ResourceLocalizationService(dispatcher1, exec, delService,
           new ResourceLocalizationService(dispatcher1, exec, delService,
-            dirsHandler);
+            dirsHandler, new NMNullStateStoreService());
       ResourceLocalizationService spyService = spy(rawService);
       ResourceLocalizationService spyService = spy(rawService);
       dispatcher1.register(LocalizationEventType.class, spyService);
       dispatcher1.register(LocalizationEventType.class, spyService);
       spyService.init(conf);
       spyService.init(conf);
@@ -1424,12 +1661,13 @@ public class TestResourceLocalizationService {
     return getMockedResource(r, LocalResourceVisibility.PRIVATE);
     return getMockedResource(r, LocalResourceVisibility.PRIVATE);
   }
   }
 
 
-  private static Container getMockContainer(ApplicationId appId, int id) {
+  private static Container getMockContainer(ApplicationId appId, int id,
+      String user) {
     Container c = mock(Container.class);
     Container c = mock(Container.class);
     ApplicationAttemptId appAttemptId =
     ApplicationAttemptId appAttemptId =
         BuilderUtils.newApplicationAttemptId(appId, 1);
         BuilderUtils.newApplicationAttemptId(appId, 1);
     ContainerId cId = BuilderUtils.newContainerId(appAttemptId, id);
     ContainerId cId = BuilderUtils.newContainerId(appAttemptId, id);
-    when(c.getUser()).thenReturn("user0");
+    when(c.getUser()).thenReturn(user);
     when(c.getContainerId()).thenReturn(cId);
     when(c.getContainerId()).thenReturn(cId);
     Credentials creds = new Credentials();
     Credentials creds = new Credentials();
     creds.addToken(new Text("tok" + id), getToken(id));
     creds.addToken(new Text("tok" + id), getToken(id));
@@ -1438,6 +1676,24 @@ public class TestResourceLocalizationService {
     return c;
     return c;
   }
   }
 
 
+  private ResourceLocalizationService createSpyService(
+      DrainDispatcher dispatcher, LocalDirsHandlerService dirsHandler,
+      NMStateStoreService stateStore) {
+    ContainerExecutor exec = mock(ContainerExecutor.class);
+    LocalizerTracker mockLocalizerTracker = mock(LocalizerTracker.class);
+    DeletionService delService = mock(DeletionService.class);
+    ResourceLocalizationService rawService =
+      new ResourceLocalizationService(dispatcher, exec, delService,
+                                      dirsHandler, stateStore);
+    ResourceLocalizationService spyService = spy(rawService);
+    doReturn(mockServer).when(spyService).createServer();
+    doReturn(mockLocalizerTracker).when(spyService).createLocalizerTracker(
+        isA(Configuration.class));
+    doReturn(lfs).when(spyService)
+        .getLocalFileContext(isA(Configuration.class));
+    return spyService;
+  }
+
   @SuppressWarnings({ "unchecked", "rawtypes" })
   @SuppressWarnings({ "unchecked", "rawtypes" })
   static Token<? extends TokenIdentifier> getToken(int id) {
   static Token<? extends TokenIdentifier> getToken(int id) {
     return new Token(("ident" + id).getBytes(), ("passwd" + id).getBytes(),
     return new Token(("ident" + id).getBytes(), ("passwd" + id).getBytes(),

+ 4 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceRetention.java

@@ -26,11 +26,13 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.api.records.LocalResourceType;
 import org.apache.hadoop.yarn.api.records.LocalResourceType;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
-
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
 import org.junit.Test;
 import org.junit.Test;
+
 import static org.junit.Assert.*;
 import static org.junit.Assert.*;
 
 
 import org.mockito.ArgumentCaptor;
 import org.mockito.ArgumentCaptor;
+
 import static org.mockito.Mockito.*;
 import static org.mockito.Mockito.*;
 
 
 public class TestResourceRetention {
 public class TestResourceRetention {
@@ -81,7 +83,7 @@ public class TestResourceRetention {
     ConcurrentMap<LocalResourceRequest,LocalizedResource> trackerResources =
     ConcurrentMap<LocalResourceRequest,LocalizedResource> trackerResources =
       new ConcurrentHashMap<LocalResourceRequest,LocalizedResource>();
       new ConcurrentHashMap<LocalResourceRequest,LocalizedResource>();
     LocalResourcesTracker ret = spy(new LocalResourcesTrackerImpl(user, null,
     LocalResourcesTracker ret = spy(new LocalResourcesTrackerImpl(user, null,
-          trackerResources, false, conf));
+      null, trackerResources, false, conf, new NMNullStateStoreService()));
     for (int i = 0; i < nRsrcs; ++i) {
     for (int i = 0; i < nRsrcs; ++i) {
       final LocalResourceRequest req = new LocalResourceRequest(
       final LocalResourceRequest req = new LocalResourceRequest(
           new Path("file:///" + user + "/rsrc" + i), timestamp + i * tsstep,
           new Path("file:///" + user + "/rsrc" + i), timestamp + i * tsstep,

+ 171 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java

@@ -0,0 +1,171 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.recovery;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto;
+
+public class NMMemoryStateStoreService extends NMStateStoreService {
+  private Map<TrackerKey, TrackerState> trackerStates;
+
+  public NMMemoryStateStoreService() {
+    super(NMMemoryStateStoreService.class.getName());
+  }
+
+  private LocalResourceTrackerState loadTrackerState(TrackerState ts) {
+    LocalResourceTrackerState result = new LocalResourceTrackerState();
+    result.localizedResources.addAll(ts.localizedResources.values());
+    for (Map.Entry<Path, LocalResourceProto> entry :
+         ts.inProgressMap.entrySet()) {
+      result.inProgressResources.put(entry.getValue(), entry.getKey());
+    }
+    return result;
+  }
+
+  private TrackerState getTrackerState(TrackerKey key) {
+    TrackerState ts = trackerStates.get(key);
+    if (ts == null) {
+      ts = new TrackerState();
+      trackerStates.put(key, ts);
+    }
+    return ts;
+  }
+
+  @Override
+  public synchronized RecoveredLocalizationState loadLocalizationState() {
+    RecoveredLocalizationState result = new RecoveredLocalizationState();
+    for (Map.Entry<TrackerKey, TrackerState> e : trackerStates.entrySet()) {
+      TrackerKey tk = e.getKey();
+      TrackerState ts = e.getValue();
+      // check what kind of tracker state we have and recover appropriately
+      // public trackers have user == null
+      // private trackers have a valid user but appId == null
+      // app-specific trackers have a valid user and valid appId
+      if (tk.user == null) {
+        result.publicTrackerState = loadTrackerState(ts);
+      } else {
+        RecoveredUserResources rur = result.userResources.get(tk.user);
+        if (rur == null) {
+          rur = new RecoveredUserResources();
+          result.userResources.put(tk.user, rur);
+        }
+        if (tk.appId == null) {
+          rur.privateTrackerState = loadTrackerState(ts);
+        } else {
+          rur.appTrackerStates.put(tk.appId, loadTrackerState(ts));
+        }
+      }
+    }
+    return result;
+  }
+
+  @Override
+  public synchronized void startResourceLocalization(String user,
+      ApplicationId appId, LocalResourceProto proto, Path localPath) {
+    TrackerState ts = getTrackerState(new TrackerKey(user, appId));
+    ts.inProgressMap.put(localPath, proto);
+  }
+
+  @Override
+  public synchronized void finishResourceLocalization(String user,
+      ApplicationId appId, LocalizedResourceProto proto) {
+    TrackerState ts = getTrackerState(new TrackerKey(user, appId));
+    Path localPath = new Path(proto.getLocalPath());
+    ts.inProgressMap.remove(localPath);
+    ts.localizedResources.put(localPath, proto);
+  }
+
+  @Override
+  public synchronized void removeLocalizedResource(String user,
+      ApplicationId appId, Path localPath) {
+    TrackerState ts = trackerStates.get(new TrackerKey(user, appId));
+    if (ts != null) {
+      ts.inProgressMap.remove(localPath);
+      ts.localizedResources.remove(localPath);
+    }
+  }
+
+  @Override
+  protected void initStorage(Configuration conf) {
+    trackerStates = new HashMap<TrackerKey, TrackerState>();
+  }
+
+  @Override
+  protected void startStorage() {
+  }
+
+  @Override
+  protected void closeStorage() {
+  }
+
+
+  private static class TrackerState {
+    Map<Path, LocalResourceProto> inProgressMap =
+        new HashMap<Path, LocalResourceProto>();
+    Map<Path, LocalizedResourceProto> localizedResources =
+        new HashMap<Path, LocalizedResourceProto>();
+  }
+
+  private static class TrackerKey {
+    String user;
+    ApplicationId appId;
+
+    public TrackerKey(String user, ApplicationId appId) {
+      this.user = user;
+      this.appId = appId;
+    }
+
+    @Override
+    public int hashCode() {
+      final int prime = 31;
+      int result = 1;
+      result = prime * result + ((appId == null) ? 0 : appId.hashCode());
+      result = prime * result + ((user == null) ? 0 : user.hashCode());
+      return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj)
+        return true;
+      if (obj == null)
+        return false;
+      if (!(obj instanceof TrackerKey))
+        return false;
+      TrackerKey other = (TrackerKey) obj;
+      if (appId == null) {
+        if (other.appId != null)
+          return false;
+      } else if (!appId.equals(other.appId))
+        return false;
+      if (user == null) {
+        if (other.user != null)
+          return false;
+      } else if (!user.equals(other.user))
+        return false;
+      return true;
+    }
+  }
+}

+ 407 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java

@@ -0,0 +1,407 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.recovery;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.impl.pb.LocalResourcePBImpl;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
+import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.LocalizedResourceProto;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.LocalResourceTrackerState;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredLocalizationState;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredUserResources;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestNMLeveldbStateStoreService {
+  private static final File TMP_DIR = new File(
+      System.getProperty("test.build.data",
+          System.getProperty("java.io.tmpdir")),
+      TestNMLeveldbStateStoreService.class.getName());
+
+  YarnConfiguration conf;
+  NMLeveldbStateStoreService stateStore;
+
+  @Before
+  public void setup() throws IOException {
+    FileUtil.fullyDelete(TMP_DIR);
+    conf = new YarnConfiguration();
+    conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
+    conf.set(YarnConfiguration.NM_RECOVERY_DIR, TMP_DIR.toString());
+    restartStateStore();
+  }
+
+  @After
+  public void cleanup() throws IOException {
+    if (stateStore != null) {
+      stateStore.close();
+    }
+    FileUtil.fullyDelete(TMP_DIR);
+  }
+
+  private void restartStateStore() throws IOException {
+    // need to close so leveldb releases database lock
+    if (stateStore != null) {
+      stateStore.close();
+    }
+    stateStore = new NMLeveldbStateStoreService();
+    stateStore.init(conf);
+    stateStore.start();
+  }
+
+  private void verifyEmptyState() throws IOException {
+    RecoveredLocalizationState state = stateStore.loadLocalizationState();
+    assertNotNull(state);
+    LocalResourceTrackerState pubts = state.getPublicTrackerState();
+    assertNotNull(pubts);
+    assertTrue(pubts.getLocalizedResources().isEmpty());
+    assertTrue(pubts.getInProgressResources().isEmpty());
+    assertTrue(state.getUserResources().isEmpty());
+  }
+
+  @Test
+  public void testEmptyState() throws IOException {
+    assertTrue(stateStore.canRecover());
+    verifyEmptyState();
+  }
+
+  @Test
+  public void testStartResourceLocalization() throws IOException {
+    String user = "somebody";
+    ApplicationId appId = ApplicationId.newInstance(1, 1);
+
+    // start a local resource for an application
+    Path appRsrcPath = new Path("hdfs://some/app/resource");
+    LocalResourcePBImpl rsrcPb = (LocalResourcePBImpl)
+        LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(appRsrcPath),
+            LocalResourceType.ARCHIVE, LocalResourceVisibility.APPLICATION,
+            123L, 456L);
+    LocalResourceProto appRsrcProto = rsrcPb.getProto();
+    Path appRsrcLocalPath = new Path("/some/local/dir/for/apprsrc");
+    stateStore.startResourceLocalization(user, appId, appRsrcProto,
+        appRsrcLocalPath);
+
+    // restart and verify only app resource is marked in-progress
+    restartStateStore();
+    RecoveredLocalizationState state = stateStore.loadLocalizationState();
+    LocalResourceTrackerState pubts = state.getPublicTrackerState();
+    assertTrue(pubts.getLocalizedResources().isEmpty());
+    assertTrue(pubts.getInProgressResources().isEmpty());
+    Map<String, RecoveredUserResources> userResources =
+        state.getUserResources();
+    assertEquals(1, userResources.size());
+    RecoveredUserResources rur = userResources.get(user);
+    LocalResourceTrackerState privts = rur.getPrivateTrackerState();
+    assertNotNull(privts);
+    assertTrue(privts.getLocalizedResources().isEmpty());
+    assertTrue(privts.getInProgressResources().isEmpty());
+    assertEquals(1, rur.getAppTrackerStates().size());
+    LocalResourceTrackerState appts = rur.getAppTrackerStates().get(appId);
+    assertNotNull(appts);
+    assertTrue(appts.getLocalizedResources().isEmpty());
+    assertEquals(1, appts.getInProgressResources().size());
+    assertEquals(appRsrcLocalPath,
+        appts.getInProgressResources().get(appRsrcProto));
+
+    // start some public and private resources
+    Path pubRsrcPath1 = new Path("hdfs://some/public/resource1");
+    rsrcPb = (LocalResourcePBImpl) LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(pubRsrcPath1),
+            LocalResourceType.FILE, LocalResourceVisibility.PUBLIC,
+            789L, 135L);
+    LocalResourceProto pubRsrcProto1 = rsrcPb.getProto();
+    Path pubRsrcLocalPath1 = new Path("/some/local/dir/for/pubrsrc1");
+    stateStore.startResourceLocalization(null, null, pubRsrcProto1,
+        pubRsrcLocalPath1);
+    Path pubRsrcPath2 = new Path("hdfs://some/public/resource2");
+    rsrcPb = (LocalResourcePBImpl) LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(pubRsrcPath2),
+            LocalResourceType.FILE, LocalResourceVisibility.PUBLIC,
+            789L, 135L);
+    LocalResourceProto pubRsrcProto2 = rsrcPb.getProto();
+    Path pubRsrcLocalPath2 = new Path("/some/local/dir/for/pubrsrc2");
+    stateStore.startResourceLocalization(null, null, pubRsrcProto2,
+        pubRsrcLocalPath2);
+    Path privRsrcPath = new Path("hdfs://some/private/resource");
+    rsrcPb = (LocalResourcePBImpl) LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(privRsrcPath),
+            LocalResourceType.PATTERN, LocalResourceVisibility.PRIVATE,
+            789L, 680L, "*pattern*");
+    LocalResourceProto privRsrcProto = rsrcPb.getProto();
+    Path privRsrcLocalPath = new Path("/some/local/dir/for/privrsrc");
+    stateStore.startResourceLocalization(user, null, privRsrcProto,
+        privRsrcLocalPath);
+
+    // restart and verify resources are marked in-progress
+    restartStateStore();
+    state = stateStore.loadLocalizationState();
+    pubts = state.getPublicTrackerState();
+    assertTrue(pubts.getLocalizedResources().isEmpty());
+    assertEquals(2, pubts.getInProgressResources().size());
+    assertEquals(pubRsrcLocalPath1,
+        pubts.getInProgressResources().get(pubRsrcProto1));
+    assertEquals(pubRsrcLocalPath2,
+        pubts.getInProgressResources().get(pubRsrcProto2));
+    userResources = state.getUserResources();
+    assertEquals(1, userResources.size());
+    rur = userResources.get(user);
+    privts = rur.getPrivateTrackerState();
+    assertNotNull(privts);
+    assertTrue(privts.getLocalizedResources().isEmpty());
+    assertEquals(1, privts.getInProgressResources().size());
+    assertEquals(privRsrcLocalPath,
+        privts.getInProgressResources().get(privRsrcProto));
+    assertEquals(1, rur.getAppTrackerStates().size());
+    appts = rur.getAppTrackerStates().get(appId);
+    assertNotNull(appts);
+    assertTrue(appts.getLocalizedResources().isEmpty());
+    assertEquals(1, appts.getInProgressResources().size());
+    assertEquals(appRsrcLocalPath,
+        appts.getInProgressResources().get(appRsrcProto));
+  }
+
+  @Test
+  public void testFinishResourceLocalization() throws IOException {
+    String user = "somebody";
+    ApplicationId appId = ApplicationId.newInstance(1, 1);
+
+    // start and finish a local resource for an application
+    Path appRsrcPath = new Path("hdfs://some/app/resource");
+    LocalResourcePBImpl rsrcPb = (LocalResourcePBImpl)
+        LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(appRsrcPath),
+            LocalResourceType.ARCHIVE, LocalResourceVisibility.APPLICATION,
+            123L, 456L);
+    LocalResourceProto appRsrcProto = rsrcPb.getProto();
+    Path appRsrcLocalPath = new Path("/some/local/dir/for/apprsrc");
+    stateStore.startResourceLocalization(user, appId, appRsrcProto,
+        appRsrcLocalPath);
+    LocalizedResourceProto appLocalizedProto =
+        LocalizedResourceProto.newBuilder()
+          .setResource(appRsrcProto)
+          .setLocalPath(appRsrcLocalPath.toString())
+          .setSize(1234567L)
+          .build();
+    stateStore.finishResourceLocalization(user, appId, appLocalizedProto);
+
+    // restart and verify only app resource is completed
+    restartStateStore();
+    RecoveredLocalizationState state = stateStore.loadLocalizationState();
+    LocalResourceTrackerState pubts = state.getPublicTrackerState();
+    assertTrue(pubts.getLocalizedResources().isEmpty());
+    assertTrue(pubts.getInProgressResources().isEmpty());
+    Map<String, RecoveredUserResources> userResources =
+        state.getUserResources();
+    assertEquals(1, userResources.size());
+    RecoveredUserResources rur = userResources.get(user);
+    LocalResourceTrackerState privts = rur.getPrivateTrackerState();
+    assertNotNull(privts);
+    assertTrue(privts.getLocalizedResources().isEmpty());
+    assertTrue(privts.getInProgressResources().isEmpty());
+    assertEquals(1, rur.getAppTrackerStates().size());
+    LocalResourceTrackerState appts = rur.getAppTrackerStates().get(appId);
+    assertNotNull(appts);
+    assertTrue(appts.getInProgressResources().isEmpty());
+    assertEquals(1, appts.getLocalizedResources().size());
+    assertEquals(appLocalizedProto,
+        appts.getLocalizedResources().iterator().next());
+
+    // start some public and private resources
+    Path pubRsrcPath1 = new Path("hdfs://some/public/resource1");
+    rsrcPb = (LocalResourcePBImpl) LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(pubRsrcPath1),
+            LocalResourceType.FILE, LocalResourceVisibility.PUBLIC,
+            789L, 135L);
+    LocalResourceProto pubRsrcProto1 = rsrcPb.getProto();
+    Path pubRsrcLocalPath1 = new Path("/some/local/dir/for/pubrsrc1");
+    stateStore.startResourceLocalization(null, null, pubRsrcProto1,
+        pubRsrcLocalPath1);
+    Path pubRsrcPath2 = new Path("hdfs://some/public/resource2");
+    rsrcPb = (LocalResourcePBImpl) LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(pubRsrcPath2),
+            LocalResourceType.FILE, LocalResourceVisibility.PUBLIC,
+            789L, 135L);
+    LocalResourceProto pubRsrcProto2 = rsrcPb.getProto();
+    Path pubRsrcLocalPath2 = new Path("/some/local/dir/for/pubrsrc2");
+    stateStore.startResourceLocalization(null, null, pubRsrcProto2,
+        pubRsrcLocalPath2);
+    Path privRsrcPath = new Path("hdfs://some/private/resource");
+    rsrcPb = (LocalResourcePBImpl) LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(privRsrcPath),
+            LocalResourceType.PATTERN, LocalResourceVisibility.PRIVATE,
+            789L, 680L, "*pattern*");
+    LocalResourceProto privRsrcProto = rsrcPb.getProto();
+    Path privRsrcLocalPath = new Path("/some/local/dir/for/privrsrc");
+    stateStore.startResourceLocalization(user, null, privRsrcProto,
+        privRsrcLocalPath);
+
+    // finish some of the resources
+    LocalizedResourceProto pubLocalizedProto1 =
+        LocalizedResourceProto.newBuilder()
+          .setResource(pubRsrcProto1)
+          .setLocalPath(pubRsrcLocalPath1.toString())
+          .setSize(pubRsrcProto1.getSize())
+          .build();
+    stateStore.finishResourceLocalization(null, null, pubLocalizedProto1);
+    LocalizedResourceProto privLocalizedProto =
+        LocalizedResourceProto.newBuilder()
+          .setResource(privRsrcProto)
+          .setLocalPath(privRsrcLocalPath.toString())
+          .setSize(privRsrcProto.getSize())
+          .build();
+    stateStore.finishResourceLocalization(user, null, privLocalizedProto);
+
+    // restart and verify state
+    restartStateStore();
+    state = stateStore.loadLocalizationState();
+    pubts = state.getPublicTrackerState();
+    assertEquals(1, pubts.getLocalizedResources().size());
+    assertEquals(pubLocalizedProto1,
+        pubts.getLocalizedResources().iterator().next());
+    assertEquals(1, pubts.getInProgressResources().size());
+    assertEquals(pubRsrcLocalPath2,
+        pubts.getInProgressResources().get(pubRsrcProto2));
+    userResources = state.getUserResources();
+    assertEquals(1, userResources.size());
+    rur = userResources.get(user);
+    privts = rur.getPrivateTrackerState();
+    assertNotNull(privts);
+    assertEquals(1, privts.getLocalizedResources().size());
+    assertEquals(privLocalizedProto,
+        privts.getLocalizedResources().iterator().next());
+    assertTrue(privts.getInProgressResources().isEmpty());
+    assertEquals(1, rur.getAppTrackerStates().size());
+    appts = rur.getAppTrackerStates().get(appId);
+    assertNotNull(appts);
+    assertTrue(appts.getInProgressResources().isEmpty());
+    assertEquals(1, appts.getLocalizedResources().size());
+    assertEquals(appLocalizedProto,
+        appts.getLocalizedResources().iterator().next());
+  }
+
+  @Test
+  public void testRemoveLocalizedResource() throws IOException {
+    String user = "somebody";
+    ApplicationId appId = ApplicationId.newInstance(1, 1);
+
+    // go through the complete lifecycle for an application local resource
+    Path appRsrcPath = new Path("hdfs://some/app/resource");
+    LocalResourcePBImpl rsrcPb = (LocalResourcePBImpl)
+        LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(appRsrcPath),
+            LocalResourceType.ARCHIVE, LocalResourceVisibility.APPLICATION,
+            123L, 456L);
+    LocalResourceProto appRsrcProto = rsrcPb.getProto();
+    Path appRsrcLocalPath = new Path("/some/local/dir/for/apprsrc");
+    stateStore.startResourceLocalization(user, appId, appRsrcProto,
+        appRsrcLocalPath);
+    LocalizedResourceProto appLocalizedProto =
+        LocalizedResourceProto.newBuilder()
+          .setResource(appRsrcProto)
+          .setLocalPath(appRsrcLocalPath.toString())
+          .setSize(1234567L)
+          .build();
+    stateStore.finishResourceLocalization(user, appId, appLocalizedProto);
+    stateStore.removeLocalizedResource(user, appId, appRsrcLocalPath);
+
+    restartStateStore();
+    verifyEmptyState();
+
+    // remove an app resource that didn't finish
+    stateStore.startResourceLocalization(user, appId, appRsrcProto,
+        appRsrcLocalPath);
+    stateStore.removeLocalizedResource(user, appId, appRsrcLocalPath);
+
+    restartStateStore();
+    verifyEmptyState();
+
+    // add public and private resources and remove some
+    Path pubRsrcPath1 = new Path("hdfs://some/public/resource1");
+    rsrcPb = (LocalResourcePBImpl) LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(pubRsrcPath1),
+            LocalResourceType.FILE, LocalResourceVisibility.PUBLIC,
+            789L, 135L);
+    LocalResourceProto pubRsrcProto1 = rsrcPb.getProto();
+    Path pubRsrcLocalPath1 = new Path("/some/local/dir/for/pubrsrc1");
+    stateStore.startResourceLocalization(null, null, pubRsrcProto1,
+        pubRsrcLocalPath1);
+    LocalizedResourceProto pubLocalizedProto1 =
+        LocalizedResourceProto.newBuilder()
+          .setResource(pubRsrcProto1)
+          .setLocalPath(pubRsrcLocalPath1.toString())
+          .setSize(789L)
+          .build();
+    stateStore.finishResourceLocalization(null, null, pubLocalizedProto1);
+    Path pubRsrcPath2 = new Path("hdfs://some/public/resource2");
+    rsrcPb = (LocalResourcePBImpl) LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(pubRsrcPath2),
+            LocalResourceType.FILE, LocalResourceVisibility.PUBLIC,
+            789L, 135L);
+    LocalResourceProto pubRsrcProto2 = rsrcPb.getProto();
+    Path pubRsrcLocalPath2 = new Path("/some/local/dir/for/pubrsrc2");
+    stateStore.startResourceLocalization(null, null, pubRsrcProto2,
+        pubRsrcLocalPath2);
+    LocalizedResourceProto pubLocalizedProto2 =
+        LocalizedResourceProto.newBuilder()
+          .setResource(pubRsrcProto2)
+          .setLocalPath(pubRsrcLocalPath2.toString())
+          .setSize(7654321L)
+          .build();
+    stateStore.finishResourceLocalization(null, null, pubLocalizedProto2);
+    stateStore.removeLocalizedResource(null, null, pubRsrcLocalPath2);
+    Path privRsrcPath = new Path("hdfs://some/private/resource");
+    rsrcPb = (LocalResourcePBImpl) LocalResource.newInstance(
+            ConverterUtils.getYarnUrlFromPath(privRsrcPath),
+            LocalResourceType.PATTERN, LocalResourceVisibility.PRIVATE,
+            789L, 680L, "*pattern*");
+    LocalResourceProto privRsrcProto = rsrcPb.getProto();
+    Path privRsrcLocalPath = new Path("/some/local/dir/for/privrsrc");
+    stateStore.startResourceLocalization(user, null, privRsrcProto,
+        privRsrcLocalPath);
+    stateStore.removeLocalizedResource(user, null, privRsrcLocalPath);
+
+    // restart and verify state
+    restartStateStore();
+    RecoveredLocalizationState state = stateStore.loadLocalizationState();
+    LocalResourceTrackerState pubts = state.getPublicTrackerState();
+    assertTrue(pubts.getInProgressResources().isEmpty());
+    assertEquals(1, pubts.getLocalizedResources().size());
+    assertEquals(pubLocalizedProto1,
+        pubts.getLocalizedResources().iterator().next());
+    Map<String, RecoveredUserResources> userResources =
+        state.getUserResources();
+    assertTrue(userResources.isEmpty());
+  }
+}

+ 3 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java

@@ -54,6 +54,7 @@ import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
 import org.apache.hadoop.yarn.server.nodemanager.webapp.ContainerLogsPage.ContainersLogsBlock;
 import org.apache.hadoop.yarn.server.nodemanager.webapp.ContainerLogsPage.ContainersLogsBlock;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
@@ -77,7 +78,8 @@ public class TestContainerLogsPage {
     NodeHealthCheckerService healthChecker = new NodeHealthCheckerService();
     NodeHealthCheckerService healthChecker = new NodeHealthCheckerService();
     healthChecker.init(conf);
     healthChecker.init(conf);
     LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler();
     LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler();
-    NMContext nmContext = new NodeManager.NMContext(null, null, dirsHandler, new ApplicationACLsManager(conf));
+    NMContext nmContext = new NodeManager.NMContext(null, null, dirsHandler,
+        new ApplicationACLsManager(conf), new NMNullStateStoreService());
     // Add an application and the corresponding containers
     // Add an application and the corresponding containers
     RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(conf);
     RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(conf);
     String user = "nobody";
     String user = "nobody";

+ 7 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java

@@ -49,6 +49,8 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Cont
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
@@ -77,7 +79,8 @@ public class TestNMWebServer {
   }
   }
   
   
   private int startNMWebAppServer(String webAddr) {
   private int startNMWebAppServer(String webAddr) {
-    Context nmContext = new NodeManager.NMContext(null, null, null, null);
+    Context nmContext = new NodeManager.NMContext(null, null, null, null,
+        null);
     ResourceView resourceView = new ResourceView() {
     ResourceView resourceView = new ResourceView() {
       @Override
       @Override
       public long getVmemAllocatedForContainers() {
       public long getVmemAllocatedForContainers() {
@@ -135,7 +138,8 @@ public class TestNMWebServer {
 
 
   @Test
   @Test
   public void testNMWebApp() throws IOException, YarnException {
   public void testNMWebApp() throws IOException, YarnException {
-    Context nmContext = new NodeManager.NMContext(null, null, null, null);
+    Context nmContext = new NodeManager.NMContext(null, null, null, null,
+        null);
     ResourceView resourceView = new ResourceView() {
     ResourceView resourceView = new ResourceView() {
       @Override
       @Override
       public long getVmemAllocatedForContainers() {
       public long getVmemAllocatedForContainers() {
@@ -185,6 +189,7 @@ public class TestNMWebServer {
     ContainerId container2 =
     ContainerId container2 =
         BuilderUtils.newContainerId(recordFactory, appId, appAttemptId, 1);
         BuilderUtils.newContainerId(recordFactory, appId, appAttemptId, 1);
     NodeManagerMetrics metrics = mock(NodeManagerMetrics.class);
     NodeManagerMetrics metrics = mock(NodeManagerMetrics.class);
+    NMStateStoreService stateStore = new NMNullStateStoreService();
     for (ContainerId containerId : new ContainerId[] { container1,
     for (ContainerId containerId : new ContainerId[] { container1,
         container2}) {
         container2}) {
       // TODO: Use builder utils
       // TODO: Use builder utils

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServices.java

@@ -107,7 +107,8 @@ public class TestNMWebServices extends JerseyTest {
       healthChecker.init(conf);
       healthChecker.init(conf);
       dirsHandler = healthChecker.getDiskHandler();
       dirsHandler = healthChecker.getDiskHandler();
       aclsManager = new ApplicationACLsManager(conf);
       aclsManager = new ApplicationACLsManager(conf);
-      nmContext = new NodeManager.NMContext(null, null, dirsHandler, aclsManager);
+      nmContext = new NodeManager.NMContext(null, null, dirsHandler,
+          aclsManager, null);
       NodeId nodeId = NodeId.newInstance("testhost.foo.com", 8042);
       NodeId nodeId = NodeId.newInstance("testhost.foo.com", 8042);
       ((NodeManager.NMContext)nmContext).setNodeId(nodeId);
       ((NodeManager.NMContext)nmContext).setNodeId(nodeId);
       resourceView = new ResourceView() {
       resourceView = new ResourceView() {

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesApps.java

@@ -99,7 +99,8 @@ public class TestNMWebServicesApps extends JerseyTest {
       healthChecker.init(conf);
       healthChecker.init(conf);
       dirsHandler = healthChecker.getDiskHandler();
       dirsHandler = healthChecker.getDiskHandler();
       aclsManager = new ApplicationACLsManager(conf);
       aclsManager = new ApplicationACLsManager(conf);
-      nmContext = new NodeManager.NMContext(null, null, dirsHandler, aclsManager);
+      nmContext = new NodeManager.NMContext(null, null, dirsHandler,
+          aclsManager, null);
       NodeId nodeId = NodeId.newInstance("testhost.foo.com", 9999);
       NodeId nodeId = NodeId.newInstance("testhost.foo.com", 9999);
       ((NodeManager.NMContext)nmContext).setNodeId(nodeId);
       ((NodeManager.NMContext)nmContext).setNodeId(nodeId);
       resourceView = new ResourceView() {
       resourceView = new ResourceView() {

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServicesContainers.java

@@ -122,7 +122,8 @@ public class TestNMWebServicesContainers extends JerseyTest {
       healthChecker.init(conf);
       healthChecker.init(conf);
       dirsHandler = healthChecker.getDiskHandler();
       dirsHandler = healthChecker.getDiskHandler();
       aclsManager = new ApplicationACLsManager(conf);
       aclsManager = new ApplicationACLsManager(conf);
-      nmContext = new NodeManager.NMContext(null, null, dirsHandler, aclsManager) {
+      nmContext = new NodeManager.NMContext(null, null, dirsHandler,
+          aclsManager, null) {
         public NodeId getNodeId() {
         public NodeId getNodeId() {
           return NodeId.newInstance("testhost.foo.com", 8042);
           return NodeId.newInstance("testhost.foo.com", 8042);
         };
         };

+ 11 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java

@@ -90,7 +90,9 @@ public class ZKRMStateStore extends RMStateStore {
 
 
   private String zkHostPort = null;
   private String zkHostPort = null;
   private int zkSessionTimeout;
   private int zkSessionTimeout;
-  private long zkRetryInterval;
+
+  @VisibleForTesting
+  long zkRetryInterval;
   private List<ACL> zkAcl;
   private List<ACL> zkAcl;
   private List<ZKUtil.ZKAuthInfo> zkAuths;
   private List<ZKUtil.ZKAuthInfo> zkAuths;
 
 
@@ -199,9 +201,14 @@ public class ZKRMStateStore extends RMStateStore {
     zkSessionTimeout =
     zkSessionTimeout =
         conf.getInt(YarnConfiguration.RM_ZK_TIMEOUT_MS,
         conf.getInt(YarnConfiguration.RM_ZK_TIMEOUT_MS,
             YarnConfiguration.DEFAULT_RM_ZK_TIMEOUT_MS);
             YarnConfiguration.DEFAULT_RM_ZK_TIMEOUT_MS);
-    zkRetryInterval =
-        conf.getLong(YarnConfiguration.RM_ZK_RETRY_INTERVAL_MS,
-          YarnConfiguration.DEFAULT_RM_ZK_RETRY_INTERVAL_MS);
+
+    if (HAUtil.isHAEnabled(conf)) {
+      zkRetryInterval = zkSessionTimeout / numRetries;
+    } else {
+      zkRetryInterval =
+          conf.getLong(YarnConfiguration.RM_ZK_RETRY_INTERVAL_MS,
+              YarnConfiguration.DEFAULT_RM_ZK_RETRY_INTERVAL_MS);
+    }
 
 
     zkAcl = RMZKUtils.getZKAcls(conf);
     zkAcl = RMZKUtils.getZKAcls(conf);
     zkAuths = RMZKUtils.getZKAuths(conf);
     zkAuths = RMZKUtils.getZKAuths(conf);

+ 23 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStoreZKClientConnections.java

@@ -41,6 +41,7 @@ import java.security.NoSuchAlgorithmException;
 import java.util.concurrent.CyclicBarrier;
 import java.util.concurrent.CyclicBarrier;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 import static org.junit.Assert.fail;
 
 
@@ -203,7 +204,7 @@ public class TestZKRMStateStoreZKClientConnections extends
       LOG.error(error, e);
       LOG.error(error, e);
       fail(error);
       fail(error);
     }
     }
-    Assert.assertEquals("newBytes", new String(ret));
+    assertEquals("newBytes", new String(ret));
   }
   }
 
 
   @Test(timeout = 20000)
   @Test(timeout = 20000)
@@ -232,7 +233,7 @@ public class TestZKRMStateStoreZKClientConnections extends
 
 
     try {
     try {
       byte[] ret = store.getDataWithRetries(path, false);
       byte[] ret = store.getDataWithRetries(path, false);
-      Assert.assertEquals("bytes", new String(ret));
+      assertEquals("bytes", new String(ret));
     } catch (Exception e) {
     } catch (Exception e) {
       String error = "New session creation failed";
       String error = "New session creation failed";
       LOG.error(error, e);
       LOG.error(error, e);
@@ -281,4 +282,24 @@ public class TestZKRMStateStoreZKClientConnections extends
 
 
     zkClientTester.getRMStateStore(conf);
     zkClientTester.getRMStateStore(conf);
   }
   }
+
+  @Test
+  public void testZKRetryInterval() throws Exception {
+    TestZKClient zkClientTester = new TestZKClient();
+    YarnConfiguration conf = new YarnConfiguration();
+
+    ZKRMStateStore store =
+        (ZKRMStateStore) zkClientTester.getRMStateStore(conf);
+    assertEquals(YarnConfiguration.DEFAULT_RM_ZK_RETRY_INTERVAL_MS,
+        store.zkRetryInterval);
+    store.stop();
+
+    conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true);
+    store =
+        (ZKRMStateStore) zkClientTester.getRMStateStore(conf);
+    assertEquals(YarnConfiguration.DEFAULT_RM_ZK_TIMEOUT_MS /
+            YarnConfiguration.DEFAULT_ZK_RM_NUM_RETRIES,
+        store.zkRetryInterval);
+    store.stop();
+  }
 }
 }