Browse Source

Merge trunk into HA branch.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1208644 13f79535-47bb-0310-9956-ffa450edef68
Aaron Myers 13 years ago
parent
commit
0eec2218a1
81 changed files with 1669 additions and 562 deletions
  1. 6 0
      hadoop-common-project/hadoop-common/CHANGES.txt
  2. 56 2
      hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-conf.sh
  3. 1 1
      hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hdfs-site.xml
  4. 6 0
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  5. 22 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/webhdfs.xml
  6. 3 5
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
  7. 1 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java
  8. 9 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
  9. 2 6
      hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
  10. 18 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationTokenForProxyUser.java
  11. 1 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestJsonUtil.java
  12. 8 0
      hadoop-mapreduce-project/CHANGES.txt
  13. 2 2
      hadoop-mapreduce-project/conf/container-executor.cfg
  14. 8 3
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
  15. 2 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java
  16. 3 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/avro/Events.avpr
  17. 7 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java
  18. 7 4
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/MapAttemptFinishedEvent.java
  19. 7 3
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/ReduceAttemptFinishedEvent.java
  20. 6 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/TaskAttemptUnsuccessfulCompletionEvent.java
  21. 4 5
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedTaskAttempt.java
  22. 3 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEvents.java
  23. 6 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java
  24. 1 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java
  25. 2 1
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
  26. 29 3
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
  27. 3 2
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogFormat.java
  28. 15 28
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/FSDownload.java
  29. 11 8
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestFSDownload.java
  30. 16 0
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/resources/yarn-default.xml
  31. 11 4
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java
  32. 41 37
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
  33. 96 0
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java
  34. 15 7
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
  35. 297 0
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
  36. 97 0
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java
  37. 19 40
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthScriptRunner.java
  38. 23 17
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
  39. 7 5
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
  40. 12 6
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
  41. 7 1
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerExitEvent.java
  42. 39 30
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
  43. 12 5
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java
  44. 18 7
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java
  45. 73 61
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java
  46. 14 7
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java
  47. 8 8
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java
  48. 9 8
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/NonAggregatingLogHandler.java
  49. 15 17
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsPage.java
  50. 9 3
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/WebServer.java
  51. 9 3
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.c
  52. 3 0
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.h
  53. 22 53
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c
  54. 8 6
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h
  55. 22 13
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c
  56. 24 17
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c
  57. 11 7
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java
  58. 6 3
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java
  59. 18 6
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java
  60. 9 2
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java
  61. 47 27
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java
  62. 3 3
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
  63. 8 1
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java
  64. 2 1
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
  65. 6 3
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
  66. 27 12
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java
  67. 6 3
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java
  68. 12 4
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/TestNonAggregatingLogHandler.java
  69. 25 7
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java
  70. 43 20
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java
  71. 1 1
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java
  72. 247 0
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java
  73. 9 15
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm
  74. 3 3
      hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobInProgress.java
  75. 4 0
      hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestCombineOutputCollector.java
  76. 1 1
      hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEvents.java
  77. 2 2
      hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
  78. 1 1
      hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/MapAttempt20LineHistoryEventEmitter.java
  79. 1 1
      hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/ReduceAttempt20LineHistoryEventEmitter.java
  80. 1 1
      hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/TaskAttempt20LineEventEmitter.java
  81. 1 1
      pom.xml

+ 6 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -126,6 +126,9 @@ Release 0.23.1 - Unreleased
     HADOOP-7424. Log an error if the topology script doesn't handle multiple args.
     HADOOP-7424. Log an error if the topology script doesn't handle multiple args.
     (Uma Maheswara Rao G via eli)
     (Uma Maheswara Rao G via eli)
 
 
+    HADOOP-7804. Enable hadoop config generator to set configurations to enable
+    short circuit read. (Arpit Gupta via jitendra)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
   BUG FIXES
   BUG FIXES
@@ -141,6 +144,9 @@ Release 0.23.1 - Unreleased
 
 
    HADOOP-7859. TestViewFsHdfs.testgetFileLinkStatus is failing an assert. (eli)
    HADOOP-7859. TestViewFsHdfs.testgetFileLinkStatus is failing an assert. (eli)
 
 
+   HADOOP-7864. Building mvn site with Maven < 3.0.2 causes OOM errors.
+   (Andrew Bayer via eli)
+
 Release 0.23.0 - 2011-11-01 
 Release 0.23.0 - 2011-11-01 
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 56 - 2
hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-conf.sh

@@ -67,6 +67,10 @@ usage: $0 <parameters>
                                                                      This value should be <= mapred.cluster.max.map.memory.mb
                                                                      This value should be <= mapred.cluster.max.map.memory.mb
      --mapreduce-reduce-memory-mb=memory                             Virtual memory, of a single reduce slot for a job. Defaults to -1
      --mapreduce-reduce-memory-mb=memory                             Virtual memory, of a single reduce slot for a job. Defaults to -1
                                                                      This value should be <= mapred.cluster.max.reduce.memory.mb
                                                                      This value should be <= mapred.cluster.max.reduce.memory.mb
+     --dfs-datanode-dir-perm=700                                     Set the permission for the datanode data directories. Defaults to 700
+     --dfs-block-local-path-access-user=user                         User for which you want to enable shortcircuit read.
+     --dfs-client-read-shortcircuit=true/false                       Enable shortcircuit read for the client. Will default to true if the shortcircuit user is set.
+     --dfs-client-read-shortcircuit-skip-checksum=false/true         Disable checking of checksum when shortcircuit read is taking place. Defaults to false.
   "
   "
   exit 1
   exit 1
 }
 }
@@ -124,7 +128,7 @@ function addPropertyToXMLConf
   local finalVal=$5
   local finalVal=$5
 
 
   #create the property text, make sure the / are escaped
   #create the property text, make sure the / are escaped
-  propText="<property>\n<name>$property<\/name>\n<value>$propValue<\/value>"
+  propText="<property>\n<name>$property<\/name>\n<value>$propValue<\/value>\n"
   #if description is not empty add it
   #if description is not empty add it
   if [ ! -z $desc ]
   if [ ! -z $desc ]
   then
   then
@@ -146,6 +150,28 @@ function addPropertyToXMLConf
   sed -i "s|$endText|$propText$endText|" $file
   sed -i "s|$endText|$propText$endText|" $file
 }
 }
 
 
+##########################################
+# Function to setup up the short circuit read settings
+#########################################
+function setupShortCircuitRead
+{
+  local conf_file="${HADOOP_CONF_DIR}/hdfs-site.xml"
+  #if the shortcircuit user is not set then return
+  if [ -z $DFS_BLOCK_LOCAL_PATH_ACCESS_USER ]
+  then
+    return
+  fi
+  
+  #set the defaults if values not present
+  DFS_CLIENT_READ_SHORTCIRCUIT=${DFS_CLIENT_READ_SHORTCIRCUIT:-false}
+  DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM=${DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM:-false}
+
+  #add the user to the conf file
+  addPropertyToXMLConf "$conf_file" "dfs.block.local-path-access.user" "$DFS_BLOCK_LOCAL_PATH_ACCESS_USER"
+  addPropertyToXMLConf "$conf_file" "dfs.client.read.shortcircuit" "$DFS_CLIENT_READ_SHORTCIRCUIT"
+  addPropertyToXMLConf "$conf_file" "dfs.client.read.shortcircuit.skip.checksum" "$DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM"
+}
+
 ##########################################
 ##########################################
 # Function to setup up the proxy user settings
 # Function to setup up the proxy user settings
 #########################################
 #########################################
@@ -217,6 +243,10 @@ OPTS=$(getopt \
   -l 'mapreduce-jobtracker-maxreducememory-mb:' \
   -l 'mapreduce-jobtracker-maxreducememory-mb:' \
   -l 'mapreduce-map-memory-mb:' \
   -l 'mapreduce-map-memory-mb:' \
   -l 'mapreduce-reduce-memory-mb:' \
   -l 'mapreduce-reduce-memory-mb:' \
+  -l 'dfs-datanode-dir-perm:' \
+  -l 'dfs-block-local-path-access-user:' \
+  -l 'dfs-client-read-shortcircuit:' \
+  -l 'dfs-client-read-shortcircuit-skip-checksum:' \   
   -o 'h' \
   -o 'h' \
   -- "$@") 
   -- "$@") 
   
   
@@ -376,6 +406,22 @@ while true ; do
       MAPREDUCE_REDUCE_MEMORY_MB=$2; shift 2
       MAPREDUCE_REDUCE_MEMORY_MB=$2; shift 2
       AUTOMATED=1
       AUTOMATED=1
       ;;
       ;;
+    --dfs-datanode-dir-perm)
+      DFS_DATANODE_DIR_PERM=$2; shift 2
+      AUTOMATED=1
+      ;;
+    --dfs-block-local-path-access-user)
+      DFS_BLOCK_LOCAL_PATH_ACCESS_USER=$2; shift 2
+      AUTOMATED=1
+      ;;
+    --dfs-client-read-shortcircuit)
+      DFS_CLIENT_READ_SHORTCIRCUIT=$2; shift 2
+      AUTOMATED=1
+      ;;
+    --dfs-client-read-shortcircuit-skip-checksum)
+      DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM=$2; shift 2
+      AUTOMATED=1
+      ;;
     --)
     --)
       shift ; break
       shift ; break
       ;;
       ;;
@@ -421,6 +467,8 @@ DFS_SUPPORT_APPEND=${DFS_SUPPORT_APPEND:-false}
 KERBEROS_REALM=${KERBEROS_REALM:-KERBEROS.EXAMPLE.COM}
 KERBEROS_REALM=${KERBEROS_REALM:-KERBEROS.EXAMPLE.COM}
 SECURITY_TYPE=${SECURITY_TYPE:-simple}
 SECURITY_TYPE=${SECURITY_TYPE:-simple}
 KINIT=${KINIT:-/usr/kerberos/bin/kinit}
 KINIT=${KINIT:-/usr/kerberos/bin/kinit}
+#deault the data dir perm to 700
+DFS_DATANODE_DIR_PERM=${DFS_DATANODE_DIR_PERM:-700}
 if [ "${SECURITY_TYPE}" = "kerberos" ]; then
 if [ "${SECURITY_TYPE}" = "kerberos" ]; then
   TASK_CONTROLLER="org.apache.hadoop.mapred.LinuxTaskController"
   TASK_CONTROLLER="org.apache.hadoop.mapred.LinuxTaskController"
   HADOOP_DN_ADDR="0.0.0.0:1019"
   HADOOP_DN_ADDR="0.0.0.0:1019"
@@ -561,7 +609,10 @@ if [ "${AUTOSETUP}" == "1" -o "${AUTOSETUP}" == "y" ]; then
 
 
   #setup up the proxy users
   #setup up the proxy users
   setupProxyUsers
   setupProxyUsers
-  
+ 
+  #setup short circuit read
+  setupShortCircuitRead
+
   #set the owner of the hadoop dir to root
   #set the owner of the hadoop dir to root
   chown root ${HADOOP_PREFIX}
   chown root ${HADOOP_PREFIX}
   chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/hadoop-env.sh
   chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/hadoop-env.sh
@@ -611,6 +662,9 @@ else
   #setup up the proxy users
   #setup up the proxy users
   setupProxyUsers
   setupProxyUsers
   
   
+  #setup short circuit read
+  setupShortCircuitRead
+
   chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/hadoop-env.sh
   chown root:${HADOOP_GROUP} ${HADOOP_CONF_DIR}/hadoop-env.sh
   chmod 755 ${HADOOP_CONF_DIR}/hadoop-env.sh
   chmod 755 ${HADOOP_CONF_DIR}/hadoop-env.sh
   #set taskcontroller
   #set taskcontroller

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hdfs-site.xml

@@ -202,7 +202,7 @@
 
 
   <property>
   <property>
     <name>dfs.datanode.data.dir.perm</name>
     <name>dfs.datanode.data.dir.perm</name>
-    <value>700</value>
+    <value>${DFS_DATANODE_DIR_PERM}</value>
     <description>The permissions that should be there on dfs.data.dir
     <description>The permissions that should be there on dfs.data.dir
       directories. The datanode will not come up if the permissions are
       directories. The datanode will not come up if the permissions are
       different on existing dfs.data.dir directories. If the directories
       different on existing dfs.data.dir directories. If the directories

+ 6 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -116,6 +116,9 @@ Trunk (unreleased changes)
     HDFS-2532. TestDfsOverAvroRpc timing out in trunk (Uma Maheswara Rao G
     HDFS-2532. TestDfsOverAvroRpc timing out in trunk (Uma Maheswara Rao G
                via todd)
                via todd)
 
 
+    HDFS-2606. webhdfs client filesystem impl must set the content-type 
+    header for create/append. (tucu)
+
 Release 0.23.1 - UNRELEASED
 Release 0.23.1 - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES
@@ -158,6 +161,9 @@ Release 0.23.1 - UNRELEASED
 
 
     HDFS-2587. Add apt doc for WebHDFS REST API.  (szetszwo)
     HDFS-2587. Add apt doc for WebHDFS REST API.  (szetszwo)
 
 
+    HDFS-2604. Add a log message to show if WebHDFS is enabled and a
+    configuration section in the forrest doc.  (szetszwo)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
     HDFS-2130. Switch default checksum to CRC32C. (todd)
     HDFS-2130. Switch default checksum to CRC32C. (todd)

+ 22 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/webhdfs.xml

@@ -138,6 +138,28 @@
   http://&lt;HOST&gt;:&lt;HTTP_PORT&gt;/webhdfs/v1/&lt;PATH&gt;?op=...
   http://&lt;HOST&gt;:&lt;HTTP_PORT&gt;/webhdfs/v1/&lt;PATH&gt;?op=...
 </source>
 </source>
       </section>
       </section>
+<!-- ***************************************************************************** -->
+      <section>
+        <title>HDFS Configuration Options</title>
+<p>
+  Below are the HDFS configuration options for WebHDFS.
+</p>
+<table>
+<tr><th>Property Name</th><th>Description</th></tr>
+<tr><td><code>dfs.webhdfs.enabled</code></td>
+<td>Enable/disable WebHDFS in Namenodes and Datanodes
+</td></tr>
+<tr><td><code>dfs.web.authentication.kerberos.principal</code></td>
+<td>The HTTP Kerberos principal used by Hadoop-Auth in the HTTP endpoint.
+    The HTTP Kerberos principal MUST start with 'HTTP/' per Kerberos
+    HTTP SPENGO specification.
+</td></tr>
+<tr><td><code>dfs.web.authentication.kerberos.keytab</code></td>
+<td>The Kerberos keytab file with the credentials for the
+    HTTP Kerberos principal used by Hadoop-Auth in the HTTP endpoint.
+</td></tr>
+</table>
+      </section>
     </section>
     </section>
 <!-- ***************************************************************************** -->
 <!-- ***************************************************************************** -->
     <section id="Authentication">
     <section id="Authentication">

+ 3 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java

@@ -35,9 +35,9 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DNS_NAMESERVER_K
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HANDLER_COUNT_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HANDLER_COUNT_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HANDLER_COUNT_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HANDLER_COUNT_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HTTPS_ADDRESS_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HTTP_ADDRESS_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HTTP_ADDRESS_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HTTP_ADDRESS_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HTTP_ADDRESS_KEY;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HTTPS_ADDRESS_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_IPC_ADDRESS_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_IPC_ADDRESS_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_KEYTAB_FILE_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_KEYTAB_FILE_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PLUGINS_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PLUGINS_KEY;
@@ -50,8 +50,6 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_STORAGEID_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_USER_NAME_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_USER_NAME_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_FEDERATION_NAMESERVICES;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_FEDERATION_NAMESERVICES;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HTTPS_ENABLE_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HTTPS_ENABLE_KEY;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_WEBHDFS_ENABLED_DEFAULT;
 
 
 import java.io.BufferedOutputStream;
 import java.io.BufferedOutputStream;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayInputStream;
@@ -97,6 +95,7 @@ import org.apache.hadoop.hdfs.DFSUtil.ConfiguredNNAddress;
 import org.apache.hadoop.hdfs.HDFSPolicyProvider;
 import org.apache.hadoop.hdfs.HDFSPolicyProvider;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
@@ -135,7 +134,6 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.InterDatanodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.InterDatanodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
 import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
-import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
 import org.apache.hadoop.hdfs.server.protocolR23Compatible.InterDatanodeProtocolServerSideTranslatorR23;
 import org.apache.hadoop.hdfs.server.protocolR23Compatible.InterDatanodeProtocolServerSideTranslatorR23;
 import org.apache.hadoop.hdfs.server.protocolR23Compatible.InterDatanodeProtocolTranslatorR23;
 import org.apache.hadoop.hdfs.server.protocolR23Compatible.InterDatanodeProtocolTranslatorR23;
 import org.apache.hadoop.hdfs.server.protocolR23Compatible.InterDatanodeWireProtocol;
 import org.apache.hadoop.hdfs.server.protocolR23Compatible.InterDatanodeWireProtocol;
@@ -512,7 +510,7 @@ public class DataNode extends Configured
     this.infoServer.addServlet(null, "/blockScannerReport", 
     this.infoServer.addServlet(null, "/blockScannerReport", 
                                DataBlockScanner.Servlet.class);
                                DataBlockScanner.Servlet.class);
 
 
-    if (conf.getBoolean(DFS_WEBHDFS_ENABLED_KEY, DFS_WEBHDFS_ENABLED_DEFAULT)) {
+    if (WebHdfsFileSystem.isEnabled(conf, LOG)) {
       infoServer.addJerseyResourcePackage(DatanodeWebHdfsMethods.class
       infoServer.addJerseyResourcePackage(DatanodeWebHdfsMethods.class
           .getPackage().getName() + ";" + Param.class.getPackage().getName(),
           .getPackage().getName() + ";" + Param.class.getPackage().getName(),
           WebHdfsFileSystem.PATH_PREFIX + "/*");
           WebHdfsFileSystem.PATH_PREFIX + "/*");

+ 1 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java

@@ -108,8 +108,7 @@ public class NameNodeHttpServer {
               infoPort == 0, conf, 
               infoPort == 0, conf, 
               new AccessControlList(conf.get(DFSConfigKeys.DFS_ADMIN, " "))) {
               new AccessControlList(conf.get(DFSConfigKeys.DFS_ADMIN, " "))) {
             {
             {
-              if (conf.getBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY,
-                  DFSConfigKeys.DFS_WEBHDFS_ENABLED_DEFAULT)) {
+              if (WebHdfsFileSystem.isEnabled(conf, LOG)) {
                 //add SPNEGO authentication filter for webhdfs
                 //add SPNEGO authentication filter for webhdfs
                 final String name = "SPNEGO";
                 final String name = "SPNEGO";
                 final String classname =  AuthFilter.class.getName();
                 final String classname =  AuthFilter.class.getName();

+ 9 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java

@@ -131,6 +131,14 @@ public class WebHdfsFileSystem extends FileSystem
     DT_RENEWER.addRenewAction(webhdfs);
     DT_RENEWER.addRenewAction(webhdfs);
   }
   }
 
 
+  /** Is WebHDFS enabled in conf? */
+  public static boolean isEnabled(final Configuration conf, final Log log) {
+    final boolean b = conf.getBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY,
+        DFSConfigKeys.DFS_WEBHDFS_ENABLED_DEFAULT);
+    log.info(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY + " = " + b);
+    return b;
+  }
+
   private final UserGroupInformation ugi;
   private final UserGroupInformation ugi;
   private InetSocketAddress nnAddr;
   private InetSocketAddress nnAddr;
   private Token<?> delegationToken;
   private Token<?> delegationToken;
@@ -349,6 +357,7 @@ public class WebHdfsFileSystem extends FileSystem
       conn.setRequestMethod(op.getType().toString());
       conn.setRequestMethod(op.getType().toString());
       if (op.getDoOutput()) {
       if (op.getDoOutput()) {
         conn = twoStepWrite(conn, op);
         conn = twoStepWrite(conn, op);
+        conn.setRequestProperty("Content-Type", "application/octet-stream");
       }
       }
       conn.setDoOutput(op.getDoOutput());
       conn.setDoOutput(op.getDoOutput());
       conn.connect();
       conn.connect();

+ 2 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

@@ -122,17 +122,13 @@ creations/deletions), or "all".</description>
 <property>
 <property>
   <name>dfs.datanode.https.address</name>
   <name>dfs.datanode.https.address</name>
   <value>0.0.0.0:50475</value>
   <value>0.0.0.0:50475</value>
-  <description>The datanode secure http server address and port.
-    If the port is 0 then the server will start on a free port.
-  </description>
+  <description>The datanode secure http server address and port.</description>
 </property>
 </property>
 
 
 <property>
 <property>
   <name>dfs.namenode.https-address</name>
   <name>dfs.namenode.https-address</name>
   <value>0.0.0.0:50470</value>
   <value>0.0.0.0:50470</value>
-  <description>The namenode secure http server address and port.
-    If the port is 0 then the server will start on a free port.
-  </description>
+  <description>The namenode secure http server address and port.</description>
 </property>
 </property>
 
 
  <property>
  <property>

+ 18 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationTokenForProxyUser.java

@@ -55,6 +55,7 @@ import org.apache.hadoop.hdfs.web.WebHdfsTestUtil;
 import org.apache.hadoop.hdfs.web.resources.DoAsParam;
 import org.apache.hadoop.hdfs.web.resources.DoAsParam;
 import org.apache.hadoop.hdfs.web.resources.ExceptionHandler;
 import org.apache.hadoop.hdfs.web.resources.ExceptionHandler;
 import org.apache.hadoop.hdfs.web.resources.GetOpParam;
 import org.apache.hadoop.hdfs.web.resources.GetOpParam;
+import org.apache.hadoop.hdfs.web.resources.PostOpParam;
 import org.apache.hadoop.hdfs.web.resources.PutOpParam;
 import org.apache.hadoop.hdfs.web.resources.PutOpParam;
 import org.apache.hadoop.security.TestDoAsEffectiveUser;
 import org.apache.hadoop.security.TestDoAsEffectiveUser;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -198,9 +199,9 @@ public class TestDelegationTokenForProxyUser {
       Assert.assertEquals("/user/" + PROXY_USER, responsePath);
       Assert.assertEquals("/user/" + PROXY_USER, responsePath);
     }
     }
 
 
+    final Path f = new Path("/testWebHdfsDoAs/a.txt");
     {
     {
       //test create file with doAs
       //test create file with doAs
-      final Path f = new Path("/testWebHdfsDoAs/a.txt");
       final PutOpParam.Op op = PutOpParam.Op.CREATE;
       final PutOpParam.Op op = PutOpParam.Op.CREATE;
       final URL url = WebHdfsTestUtil.toUrl(webhdfs, op,  f, new DoAsParam(PROXY_USER));
       final URL url = WebHdfsTestUtil.toUrl(webhdfs, op,  f, new DoAsParam(PROXY_USER));
       HttpURLConnection conn = (HttpURLConnection) url.openConnection();
       HttpURLConnection conn = (HttpURLConnection) url.openConnection();
@@ -213,5 +214,21 @@ public class TestDelegationTokenForProxyUser {
       WebHdfsTestUtil.LOG.info("status.getOwner()=" + status.getOwner());
       WebHdfsTestUtil.LOG.info("status.getOwner()=" + status.getOwner());
       Assert.assertEquals(PROXY_USER, status.getOwner());
       Assert.assertEquals(PROXY_USER, status.getOwner());
     }
     }
+
+    {
+      //test append file with doAs
+      final PostOpParam.Op op = PostOpParam.Op.APPEND;
+      final URL url = WebHdfsTestUtil.toUrl(webhdfs, op,  f, new DoAsParam(PROXY_USER));
+      HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+      conn = WebHdfsTestUtil.twoStepWrite(conn, op);
+      final FSDataOutputStream out = WebHdfsTestUtil.write(webhdfs, op, conn, 4096);
+      out.write("\nHello again!".getBytes());
+      out.close();
+  
+      final FileStatus status = webhdfs.getFileStatus(f);
+      WebHdfsTestUtil.LOG.info("status.getOwner()=" + status.getOwner());
+      WebHdfsTestUtil.LOG.info("status.getLen()  =" + status.getLen());
+      Assert.assertEquals(PROXY_USER, status.getOwner());
+    }
   }
   }
 }
 }

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestJsonUtil.java

@@ -33,6 +33,7 @@ public class TestJsonUtil {
     return new FileStatus(f.getLen(), f.isDir(), f.getReplication(),
     return new FileStatus(f.getLen(), f.isDir(), f.getReplication(),
         f.getBlockSize(), f.getModificationTime(), f.getAccessTime(),
         f.getBlockSize(), f.getModificationTime(), f.getAccessTime(),
         f.getPermission(), f.getOwner(), f.getGroup(),
         f.getPermission(), f.getOwner(), f.getGroup(),
+        f.isSymlink() ? new Path(f.getSymlink()) : null,
         new Path(f.getFullName(parent)));
         new Path(f.getFullName(parent)));
   }
   }
 
 

+ 8 - 0
hadoop-mapreduce-project/CHANGES.txt

@@ -71,6 +71,8 @@ Release 0.23.1 - Unreleased
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES
 
 
   NEW FEATURES                                                                    
   NEW FEATURES                                                                    
+   
+   MAPREDUCE-3121. NodeManager should handle disk-failures (Ravi Gummadi via mahadev)
 
 
   IMPROVEMENTS
   IMPROVEMENTS
 
 
@@ -122,6 +124,9 @@ Release 0.23.1 - Unreleased
     MAPREDUCE-3045. Fixed UI filters to not filter on hidden title-numeric
     MAPREDUCE-3045. Fixed UI filters to not filter on hidden title-numeric
     sort fields. (Jonathan Eagles via sseth)
     sort fields. (Jonathan Eagles via sseth)
 
 
+    MAPREDUCE-3448. TestCombineOutputCollector javac unchecked warning on mocked
+    generics (Jonathan Eagles via mahadev)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
   BUG FIXES
   BUG FIXES
@@ -192,6 +197,9 @@ Release 0.23.1 - Unreleased
     MAPREDUCE-3433. Finding counters by legacy group name returns empty
     MAPREDUCE-3433. Finding counters by legacy group name returns empty
     counters. (tomwhite)
     counters. (tomwhite)
 
 
+    MAPREDUCE-3450. NM port info no longer available in JobHistory.
+    (Siddharth Seth via mahadev)
+
 Release 0.23.0 - 2011-11-01 
 Release 0.23.0 - 2011-11-01 
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 2 - 2
hadoop-mapreduce-project/conf/container-executor.cfg

@@ -1,3 +1,3 @@
-yarn.nodemanager.local-dirs=#configured value of yarn.nodemanager.local-dirs. It can be a list of comma separated paths.
-yarn.nodemanager.log-dirs=#configured value of yarn.nodemanager.log-dirs.
 yarn.nodemanager.linux-container-executor.group=#configured value of yarn.nodemanager.linux-container-executor.group
 yarn.nodemanager.linux-container-executor.group=#configured value of yarn.nodemanager.linux-container-executor.group
+banned.users=#comma separated list of users who can not run applications
+min.user.id=1000#Prevent other super-users

+ 8 - 3
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java

@@ -922,8 +922,11 @@ public abstract class TaskAttemptImpl implements
             TypeConverter.fromYarn(taskAttempt.attemptId.getTaskId()
             TypeConverter.fromYarn(taskAttempt.attemptId.getTaskId()
                 .getTaskType()), attemptState.toString(),
                 .getTaskType()), attemptState.toString(),
             taskAttempt.finishTime,
             taskAttempt.finishTime,
-            taskAttempt.containerMgrAddress == null ? "UNKNOWN"
-                : taskAttempt.containerMgrAddress, StringUtils.join(
+            taskAttempt.containerNodeId == null ? "UNKNOWN"
+                : taskAttempt.containerNodeId.getHost(),
+            taskAttempt.containerNodeId == null ? -1 
+                : taskAttempt.containerNodeId.getPort(),    
+            StringUtils.join(
                 LINE_SEPARATOR, taskAttempt.getDiagnostics()), taskAttempt
                 LINE_SEPARATOR, taskAttempt.getDiagnostics()), taskAttempt
                 .getProgressSplitBlock().burst());
                 .getProgressSplitBlock().burst());
     return tauce;
     return tauce;
@@ -1273,6 +1276,7 @@ public abstract class TaskAttemptImpl implements
          finishTime,
          finishTime,
          this.containerNodeId == null ? "UNKNOWN"
          this.containerNodeId == null ? "UNKNOWN"
              : this.containerNodeId.getHost(),
              : this.containerNodeId.getHost(),
+         this.containerNodeId == null ? -1 : this.containerNodeId.getPort(),
          this.nodeRackName == null ? "UNKNOWN" : this.nodeRackName,
          this.nodeRackName == null ? "UNKNOWN" : this.nodeRackName,
          this.reportedStatus.stateString,
          this.reportedStatus.stateString,
          TypeConverter.fromYarn(getCounters()),
          TypeConverter.fromYarn(getCounters()),
@@ -1288,7 +1292,8 @@ public abstract class TaskAttemptImpl implements
          this.reportedStatus.sortFinishTime,
          this.reportedStatus.sortFinishTime,
          finishTime,
          finishTime,
          this.containerNodeId == null ? "UNKNOWN"
          this.containerNodeId == null ? "UNKNOWN"
-                                         : this.containerNodeId.getHost(),
+             : this.containerNodeId.getHost(),
+         this.containerNodeId == null ? -1 : this.containerNodeId.getPort(),
          this.nodeRackName == null ? "UNKNOWN" : this.nodeRackName,
          this.nodeRackName == null ? "UNKNOWN" : this.nodeRackName,
          this.reportedStatus.stateString,
          this.reportedStatus.stateString,
          TypeConverter.fromYarn(getCounters()),
          TypeConverter.fromYarn(getCounters()),

+ 2 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java

@@ -113,9 +113,10 @@ class LocalDistributedCacheManager {
     
     
     Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap();
     Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap();
     ExecutorService exec = Executors.newCachedThreadPool();
     ExecutorService exec = Executors.newCachedThreadPool();
+    Path destPath = localDirAllocator.getLocalPathForWrite(".", conf);
     for (LocalResource resource : localResources.values()) {
     for (LocalResource resource : localResources.values()) {
       Callable<Path> download = new FSDownload(localFSFileContext, ugi, conf,
       Callable<Path> download = new FSDownload(localFSFileContext, ugi, conf,
-          localDirAllocator, resource, new Random());
+          destPath, resource, new Random());
       Future<Path> future = exec.submit(download);
       Future<Path> future = exec.submit(download);
       resourcesToPaths.put(resource, future);
       resourcesToPaths.put(resource, future);
     }
     }

+ 3 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/avro/Events.avpr

@@ -136,6 +136,7 @@
           {"name": "mapFinishTime", "type": "long"},
           {"name": "mapFinishTime", "type": "long"},
           {"name": "finishTime", "type": "long"},
           {"name": "finishTime", "type": "long"},
           {"name": "hostname", "type": "string"},
           {"name": "hostname", "type": "string"},
+          {"name": "port", "type": "int"},
           {"name": "rackname", "type": "string"},
           {"name": "rackname", "type": "string"},
           {"name": "state", "type": "string"},
           {"name": "state", "type": "string"},
           {"name": "counters", "type": "JhCounters"},
           {"name": "counters", "type": "JhCounters"},
@@ -156,6 +157,7 @@
           {"name": "sortFinishTime", "type": "long"},
           {"name": "sortFinishTime", "type": "long"},
           {"name": "finishTime", "type": "long"},
           {"name": "finishTime", "type": "long"},
           {"name": "hostname", "type": "string"},
           {"name": "hostname", "type": "string"},
+          {"name": "port", "type": "int"},
           {"name": "rackname", "type": "string"},
           {"name": "rackname", "type": "string"},
           {"name": "state", "type": "string"},
           {"name": "state", "type": "string"},
           {"name": "counters", "type": "JhCounters"},
           {"name": "counters", "type": "JhCounters"},
@@ -199,6 +201,7 @@
           {"name": "attemptId", "type": "string"},
           {"name": "attemptId", "type": "string"},
           {"name": "finishTime", "type": "long"},
           {"name": "finishTime", "type": "long"},
           {"name": "hostname", "type": "string"},
           {"name": "hostname", "type": "string"},
+          {"name": "port", "type": "int"},
           {"name": "status", "type": "string"},
           {"name": "status", "type": "string"},
           {"name": "error", "type": "string"},
           {"name": "error", "type": "string"},
           {"name": "clockSplits", "type": { "type": "array", "items": "int"}},
           {"name": "clockSplits", "type": { "type": "array", "items": "int"}},

+ 7 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java

@@ -209,6 +209,7 @@ public class JobHistoryParser {
     attemptInfo.sortFinishTime = event.getSortFinishTime();
     attemptInfo.sortFinishTime = event.getSortFinishTime();
     attemptInfo.counters = event.getCounters();
     attemptInfo.counters = event.getCounters();
     attemptInfo.hostname = event.getHostname();
     attemptInfo.hostname = event.getHostname();
+    attemptInfo.port = event.getPort();
     attemptInfo.rackname = event.getRackName();
     attemptInfo.rackname = event.getRackName();
   }
   }
 
 
@@ -222,6 +223,7 @@ public class JobHistoryParser {
     attemptInfo.mapFinishTime = event.getMapFinishTime();
     attemptInfo.mapFinishTime = event.getMapFinishTime();
     attemptInfo.counters = event.getCounters();
     attemptInfo.counters = event.getCounters();
     attemptInfo.hostname = event.getHostname();
     attemptInfo.hostname = event.getHostname();
+    attemptInfo.port = event.getPort();
     attemptInfo.rackname = event.getRackname();
     attemptInfo.rackname = event.getRackname();
   }
   }
 
 
@@ -234,6 +236,7 @@ public class JobHistoryParser {
     attemptInfo.error = event.getError();
     attemptInfo.error = event.getError();
     attemptInfo.status = event.getTaskStatus();
     attemptInfo.status = event.getTaskStatus();
     attemptInfo.hostname = event.getHostname();
     attemptInfo.hostname = event.getHostname();
+    attemptInfo.port = event.getPort();
     attemptInfo.shuffleFinishTime = event.getFinishTime();
     attemptInfo.shuffleFinishTime = event.getFinishTime();
     attemptInfo.sortFinishTime = event.getFinishTime();
     attemptInfo.sortFinishTime = event.getFinishTime();
     attemptInfo.mapFinishTime = event.getFinishTime();
     attemptInfo.mapFinishTime = event.getFinishTime();
@@ -542,6 +545,7 @@ public class JobHistoryParser {
     int httpPort;
     int httpPort;
     int shufflePort;
     int shufflePort;
     String hostname;
     String hostname;
+    int port;
     String rackname;
     String rackname;
     ContainerId containerId;
     ContainerId containerId;
 
 
@@ -552,6 +556,7 @@ public class JobHistoryParser {
       startTime = finishTime = shuffleFinishTime = sortFinishTime = 
       startTime = finishTime = shuffleFinishTime = sortFinishTime = 
         mapFinishTime = -1;
         mapFinishTime = -1;
       error =  state =  trackerName = hostname = rackname = "";
       error =  state =  trackerName = hostname = rackname = "";
+      port = -1;
       httpPort = -1;
       httpPort = -1;
       shufflePort = -1;
       shufflePort = -1;
     }
     }
@@ -599,6 +604,8 @@ public class JobHistoryParser {
     public String getTrackerName() { return trackerName; }
     public String getTrackerName() { return trackerName; }
     /** @return the host name */
     /** @return the host name */
     public String getHostname() { return hostname; }
     public String getHostname() { return hostname; }
+    /** @return the port */
+    public int getPort() { return port; }
     /** @return the rack name */
     /** @return the rack name */
     public String getRackname() { return rackname; }
     public String getRackname() { return rackname; }
     /** @return the counters for the attempt */
     /** @return the counters for the attempt */

+ 7 - 4
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/MapAttemptFinishedEvent.java

@@ -44,6 +44,7 @@ public class MapAttemptFinishedEvent  implements HistoryEvent {
    * @param mapFinishTime Finish time of the map phase
    * @param mapFinishTime Finish time of the map phase
    * @param finishTime Finish time of the attempt
    * @param finishTime Finish time of the attempt
    * @param hostname Name of the host where the map executed
    * @param hostname Name of the host where the map executed
+   * @param port RPC port for the tracker host.
    * @param rackName Name of the rack where the map executed
    * @param rackName Name of the rack where the map executed
    * @param state State string for the attempt
    * @param state State string for the attempt
    * @param counters Counters for the attempt
    * @param counters Counters for the attempt
@@ -57,9 +58,8 @@ public class MapAttemptFinishedEvent  implements HistoryEvent {
    */
    */
   public MapAttemptFinishedEvent
   public MapAttemptFinishedEvent
       (TaskAttemptID id, TaskType taskType, String taskStatus, 
       (TaskAttemptID id, TaskType taskType, String taskStatus, 
-       long mapFinishTime, long finishTime, String hostname, String rackName,
-       String state, Counters counters,
-       int[][] allSplits) {
+       long mapFinishTime, long finishTime, String hostname, int port, 
+       String rackName, String state, Counters counters, int[][] allSplits) {
     datum.taskid = new Utf8(id.getTaskID().toString());
     datum.taskid = new Utf8(id.getTaskID().toString());
     datum.attemptId = new Utf8(id.toString());
     datum.attemptId = new Utf8(id.toString());
     datum.taskType = new Utf8(taskType.name());
     datum.taskType = new Utf8(taskType.name());
@@ -67,6 +67,7 @@ public class MapAttemptFinishedEvent  implements HistoryEvent {
     datum.mapFinishTime = mapFinishTime;
     datum.mapFinishTime = mapFinishTime;
     datum.finishTime = finishTime;
     datum.finishTime = finishTime;
     datum.hostname = new Utf8(hostname);
     datum.hostname = new Utf8(hostname);
+    datum.port = port;
     datum.rackname = new Utf8(rackName);
     datum.rackname = new Utf8(rackName);
     datum.state = new Utf8(state);
     datum.state = new Utf8(state);
     datum.counters = EventWriter.toAvro(counters);
     datum.counters = EventWriter.toAvro(counters);
@@ -106,7 +107,7 @@ public class MapAttemptFinishedEvent  implements HistoryEvent {
       (TaskAttemptID id, TaskType taskType, String taskStatus, 
       (TaskAttemptID id, TaskType taskType, String taskStatus, 
        long mapFinishTime, long finishTime, String hostname,
        long mapFinishTime, long finishTime, String hostname,
        String state, Counters counters) {
        String state, Counters counters) {
-    this(id, taskType, taskStatus, mapFinishTime, finishTime, hostname, "",
+    this(id, taskType, taskStatus, mapFinishTime, finishTime, hostname, -1, "",
         state, counters, null);
         state, counters, null);
   }
   }
   
   
@@ -136,6 +137,8 @@ public class MapAttemptFinishedEvent  implements HistoryEvent {
   public long getFinishTime() { return datum.finishTime; }
   public long getFinishTime() { return datum.finishTime; }
   /** Get the host name */
   /** Get the host name */
   public String getHostname() { return datum.hostname.toString(); }
   public String getHostname() { return datum.hostname.toString(); }
+  /** Get the tracker rpc port */
+  public int getPort() { return datum.port; }
   /** Get the rack name */
   /** Get the rack name */
   public String getRackname() { return datum.rackname.toString(); }
   public String getRackname() { return datum.rackname.toString(); }
   /** Get the state string */
   /** Get the state string */

+ 7 - 3
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/ReduceAttemptFinishedEvent.java

@@ -46,6 +46,7 @@ public class ReduceAttemptFinishedEvent  implements HistoryEvent {
    * @param sortFinishTime Finish time of the sort phase
    * @param sortFinishTime Finish time of the sort phase
    * @param finishTime Finish time of the attempt
    * @param finishTime Finish time of the attempt
    * @param hostname Name of the host where the attempt executed
    * @param hostname Name of the host where the attempt executed
+   * @param port RPC port for the tracker host.
    * @param rackName Name of the rack where the attempt executed
    * @param rackName Name of the rack where the attempt executed
    * @param state State of the attempt
    * @param state State of the attempt
    * @param counters Counters for the attempt
    * @param counters Counters for the attempt
@@ -57,8 +58,8 @@ public class ReduceAttemptFinishedEvent  implements HistoryEvent {
   public ReduceAttemptFinishedEvent
   public ReduceAttemptFinishedEvent
     (TaskAttemptID id, TaskType taskType, String taskStatus, 
     (TaskAttemptID id, TaskType taskType, String taskStatus, 
      long shuffleFinishTime, long sortFinishTime, long finishTime,
      long shuffleFinishTime, long sortFinishTime, long finishTime,
-     String hostname, String rackName, String state, Counters counters,
-     int[][] allSplits) {
+     String hostname, int port,  String rackName, String state, 
+     Counters counters, int[][] allSplits) {
     datum.taskid = new Utf8(id.getTaskID().toString());
     datum.taskid = new Utf8(id.getTaskID().toString());
     datum.attemptId = new Utf8(id.toString());
     datum.attemptId = new Utf8(id.toString());
     datum.taskType = new Utf8(taskType.name());
     datum.taskType = new Utf8(taskType.name());
@@ -67,6 +68,7 @@ public class ReduceAttemptFinishedEvent  implements HistoryEvent {
     datum.sortFinishTime = sortFinishTime;
     datum.sortFinishTime = sortFinishTime;
     datum.finishTime = finishTime;
     datum.finishTime = finishTime;
     datum.hostname = new Utf8(hostname);
     datum.hostname = new Utf8(hostname);
+    datum.port = port;
     datum.rackname = new Utf8(rackName);
     datum.rackname = new Utf8(rackName);
     datum.state = new Utf8(state);
     datum.state = new Utf8(state);
     datum.counters = EventWriter.toAvro(counters);
     datum.counters = EventWriter.toAvro(counters);
@@ -108,7 +110,7 @@ public class ReduceAttemptFinishedEvent  implements HistoryEvent {
      String hostname, String state, Counters counters) {
      String hostname, String state, Counters counters) {
     this(id, taskType, taskStatus,
     this(id, taskType, taskStatus,
          shuffleFinishTime, sortFinishTime, finishTime,
          shuffleFinishTime, sortFinishTime, finishTime,
-         hostname, "", state, counters, null);
+         hostname, -1, "", state, counters, null);
   }
   }
 
 
   ReduceAttemptFinishedEvent() {}
   ReduceAttemptFinishedEvent() {}
@@ -138,6 +140,8 @@ public class ReduceAttemptFinishedEvent  implements HistoryEvent {
   public long getFinishTime() { return datum.finishTime; }
   public long getFinishTime() { return datum.finishTime; }
   /** Get the name of the host where the attempt ran */
   /** Get the name of the host where the attempt ran */
   public String getHostname() { return datum.hostname.toString(); }
   public String getHostname() { return datum.hostname.toString(); }
+  /** Get the tracker rpc port */
+  public int getPort() { return datum.port; }
   /** Get the rack name of the node where the attempt ran */
   /** Get the rack name of the node where the attempt ran */
   public String getRackName() { return datum.rackname.toString(); }
   public String getRackName() { return datum.rackname.toString(); }
   /** Get the state string */
   /** Get the state string */

+ 6 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/TaskAttemptUnsuccessfulCompletionEvent.java

@@ -46,6 +46,7 @@ public class TaskAttemptUnsuccessfulCompletionEvent implements HistoryEvent {
    * @param status Status of the attempt
    * @param status Status of the attempt
    * @param finishTime Finish time of the attempt
    * @param finishTime Finish time of the attempt
    * @param hostname Name of the host where the attempt executed
    * @param hostname Name of the host where the attempt executed
+   * @param port rpc port for for the tracker
    * @param error Error string
    * @param error Error string
    * @param allSplits the "splits", or a pixelated graph of various
    * @param allSplits the "splits", or a pixelated graph of various
    *        measurable worker node state variables against progress.
    *        measurable worker node state variables against progress.
@@ -55,13 +56,14 @@ public class TaskAttemptUnsuccessfulCompletionEvent implements HistoryEvent {
   public TaskAttemptUnsuccessfulCompletionEvent
   public TaskAttemptUnsuccessfulCompletionEvent
        (TaskAttemptID id, TaskType taskType,
        (TaskAttemptID id, TaskType taskType,
         String status, long finishTime, 
         String status, long finishTime, 
-        String hostname, String error,
+        String hostname, int port, String error,
         int[][] allSplits) {
         int[][] allSplits) {
     datum.taskid = new Utf8(id.getTaskID().toString());
     datum.taskid = new Utf8(id.getTaskID().toString());
     datum.taskType = new Utf8(taskType.name());
     datum.taskType = new Utf8(taskType.name());
     datum.attemptId = new Utf8(id.toString());
     datum.attemptId = new Utf8(id.toString());
     datum.finishTime = finishTime;
     datum.finishTime = finishTime;
     datum.hostname = new Utf8(hostname);
     datum.hostname = new Utf8(hostname);
+    datum.port = port;
     datum.error = new Utf8(error);
     datum.error = new Utf8(error);
     datum.status = new Utf8(status);
     datum.status = new Utf8(status);
 
 
@@ -97,7 +99,7 @@ public class TaskAttemptUnsuccessfulCompletionEvent implements HistoryEvent {
        (TaskAttemptID id, TaskType taskType,
        (TaskAttemptID id, TaskType taskType,
         String status, long finishTime, 
         String status, long finishTime, 
         String hostname, String error) {
         String hostname, String error) {
-    this(id, taskType, status, finishTime, hostname, error, null);
+    this(id, taskType, status, finishTime, hostname, -1, error, null);
   }
   }
 
 
   TaskAttemptUnsuccessfulCompletionEvent() {}
   TaskAttemptUnsuccessfulCompletionEvent() {}
@@ -121,6 +123,8 @@ public class TaskAttemptUnsuccessfulCompletionEvent implements HistoryEvent {
   public long getFinishTime() { return datum.finishTime; }
   public long getFinishTime() { return datum.finishTime; }
   /** Get the name of the host where the attempt executed */
   /** Get the name of the host where the attempt executed */
   public String getHostname() { return datum.hostname.toString(); }
   public String getHostname() { return datum.hostname.toString(); }
+  /** Get the rpc port for the host where the attempt executed */
+  public int getPort() { return datum.port; }
   /** Get the error string */
   /** Get the error string */
   public String getError() { return datum.error.toString(); }
   public String getError() { return datum.error.toString(); }
   /** Get the task status */
   /** Get the task status */

+ 4 - 5
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedTaskAttempt.java

@@ -80,12 +80,11 @@ public class CompletedTaskAttempt implements TaskAttempt {
     report.setStateString(attemptInfo.getState());
     report.setStateString(attemptInfo.getState());
     report.setCounters(getCounters());
     report.setCounters(getCounters());
     report.setContainerId(attemptInfo.getContainerId());
     report.setContainerId(attemptInfo.getContainerId());
-    String []hostSplits = attemptInfo.getHostname().split(":");
-    if (hostSplits.length != 2) {
+    if (attemptInfo.getHostname() == null) {
       report.setNodeManagerHost("UNKNOWN");
       report.setNodeManagerHost("UNKNOWN");
     } else {
     } else {
-      report.setNodeManagerHost(hostSplits[0]);
-      report.setNodeManagerPort(Integer.parseInt(hostSplits[1]));
+      report.setNodeManagerHost(attemptInfo.getHostname());
+      report.setNodeManagerPort(attemptInfo.getPort());
     }
     }
     report.setNodeManagerHttpPort(attemptInfo.getHttpPort());
     report.setNodeManagerHttpPort(attemptInfo.getHttpPort());
   }
   }
@@ -97,7 +96,7 @@ public class CompletedTaskAttempt implements TaskAttempt {
 
 
   @Override
   @Override
   public String getAssignedContainerMgrAddress() {
   public String getAssignedContainerMgrAddress() {
-    return attemptInfo.getHostname();
+    return attemptInfo.getHostname() + ":" + attemptInfo.getPort();
   }
   }
 
 
   @Override
   @Override

+ 3 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEvents.java

@@ -165,6 +165,9 @@ public class TestJobHistoryEvents {
   //Verify the wrong ctor is not being used. Remove after mrv1 is removed.
   //Verify the wrong ctor is not being used. Remove after mrv1 is removed.
     ContainerId fakeCid = BuilderUtils.newContainerId(-1, -1, -1, -1);
     ContainerId fakeCid = BuilderUtils.newContainerId(-1, -1, -1, -1);
     Assert.assertFalse(attempt.getAssignedContainerID().equals(fakeCid));
     Assert.assertFalse(attempt.getAssignedContainerID().equals(fakeCid));
+    //Verify complete contianerManagerAddress
+    Assert.assertEquals(MRApp.NM_HOST + ":" + MRApp.NM_PORT,
+        attempt.getAssignedContainerMgrAddress());
   }
   }
 
 
   static class MRAppWithHistory extends MRApp {
   static class MRAppWithHistory extends MRApp {

+ 6 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java

@@ -34,6 +34,7 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TaskID;
 import org.apache.hadoop.mapreduce.TaskID;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser;
@@ -64,13 +65,14 @@ public class TestJobHistoryParsing {
   public static class MyResolver implements DNSToSwitchMapping {
   public static class MyResolver implements DNSToSwitchMapping {
     @Override
     @Override
     public List<String> resolve(List<String> names) {
     public List<String> resolve(List<String> names) {
-      return Arrays.asList(new String[]{"MyRackName"});
+      return Arrays.asList(new String[]{"/MyRackName"});
     }
     }
   }
   }
 
 
   @Test
   @Test
   public void testHistoryParsing() throws Exception {
   public void testHistoryParsing() throws Exception {
     Configuration conf = new Configuration();
     Configuration conf = new Configuration();
+    conf.set(MRJobConfig.USER_NAME, System.getProperty("user.name"));
     long amStartTimeEst = System.currentTimeMillis();
     long amStartTimeEst = System.currentTimeMillis();
     conf.setClass(
     conf.setClass(
         CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY,
         CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY,
@@ -165,10 +167,12 @@ public class TestJobHistoryParsing {
         Assert.assertNotNull("TaskAttemptInfo not found", taskAttemptInfo);
         Assert.assertNotNull("TaskAttemptInfo not found", taskAttemptInfo);
         Assert.assertEquals("Incorrect shuffle port for task attempt",
         Assert.assertEquals("Incorrect shuffle port for task attempt",
             taskAttempt.getShufflePort(), taskAttemptInfo.getShufflePort());
             taskAttempt.getShufflePort(), taskAttemptInfo.getShufflePort());
+        Assert.assertEquals(MRApp.NM_HOST, taskAttemptInfo.getHostname());
+        Assert.assertEquals(MRApp.NM_PORT, taskAttemptInfo.getPort());
 
 
         // Verify rack-name
         // Verify rack-name
         Assert.assertEquals("rack-name is incorrect", taskAttemptInfo
         Assert.assertEquals("rack-name is incorrect", taskAttemptInfo
-            .getRackname(), "MyRackName");
+            .getRackname(), "/MyRackName");
       }
       }
     }
     }
 
 

+ 1 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java

@@ -56,7 +56,7 @@ public class MiniMRYarnCluster extends MiniYARNCluster {
   }
   }
 
 
   public MiniMRYarnCluster(String testName, int noOfNMs) {
   public MiniMRYarnCluster(String testName, int noOfNMs) {
-    super(testName, noOfNMs);
+    super(testName, noOfNMs, 4, 4);
     //TODO: add the history server
     //TODO: add the history server
     historyServerWrapper = new JobHistoryServerWrapper();
     historyServerWrapper = new JobHistoryServerWrapper();
     addService(historyServerWrapper);
     addService(historyServerWrapper);

+ 2 - 1
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java

@@ -43,7 +43,8 @@ public class TestDistributedShell {
   public static void setup() throws InterruptedException, IOException {
   public static void setup() throws InterruptedException, IOException {
     LOG.info("Starting up YARN cluster");
     LOG.info("Starting up YARN cluster");
     if (yarnCluster == null) {
     if (yarnCluster == null) {
-      yarnCluster = new MiniYARNCluster(TestDistributedShell.class.getName());
+      yarnCluster = new MiniYARNCluster(TestDistributedShell.class.getName(),
+          1, 1, 1);
       yarnCluster.init(conf);
       yarnCluster.init(conf);
       yarnCluster.start();
       yarnCluster.start();
     }
     }

+ 29 - 3
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

@@ -351,13 +351,39 @@ public class YarnConfiguration extends Configuration {
   /** Class that calculates containers current resource utilization.*/
   /** Class that calculates containers current resource utilization.*/
   public static final String NM_CONTAINER_MON_RESOURCE_CALCULATOR =
   public static final String NM_CONTAINER_MON_RESOURCE_CALCULATOR =
     NM_PREFIX + "container-monitor.resource-calculator.class";
     NM_PREFIX + "container-monitor.resource-calculator.class";
-  
+
+  /**
+   * Enable/Disable disks' health checker. Default is true.
+   * An expert level configuration property.
+   */
+  public static final String NM_DISK_HEALTH_CHECK_ENABLE =
+    NM_PREFIX + "disk-health-checker.enable";
+  /** Frequency of running disks' health checker.*/
+  public static final String NM_DISK_HEALTH_CHECK_INTERVAL_MS =
+    NM_PREFIX + "disk-health-checker.interval-ms";
+  /** By default, disks' health is checked every 2 minutes. */
+  public static final long DEFAULT_NM_DISK_HEALTH_CHECK_INTERVAL_MS =
+    2 * 60 * 1000;
+
+  /**
+   * The minimum fraction of number of disks to be healthy for the nodemanager
+   * to launch new containers. This applies to nm-local-dirs and nm-log-dirs.
+   */
+  public static final String NM_MIN_HEALTHY_DISKS_FRACTION =
+    NM_PREFIX + "disk-health-checker.min-healthy-disks";
+  /**
+   * By default, at least 5% of disks are to be healthy to say that the node
+   * is healthy in terms of disks.
+   */
+  public static final float DEFAULT_NM_MIN_HEALTHY_DISKS_FRACTION
+    = 0.25F;
+
   /** Frequency of running node health script.*/
   /** Frequency of running node health script.*/
   public static final String NM_HEALTH_CHECK_INTERVAL_MS = 
   public static final String NM_HEALTH_CHECK_INTERVAL_MS = 
     NM_PREFIX + "health-checker.interval-ms";
     NM_PREFIX + "health-checker.interval-ms";
   public static final long DEFAULT_NM_HEALTH_CHECK_INTERVAL_MS = 10 * 60 * 1000;
   public static final long DEFAULT_NM_HEALTH_CHECK_INTERVAL_MS = 10 * 60 * 1000;
-  
-  /** Script time out period.*/
+
+  /** Health check script time out period.*/  
   public static final String NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS = 
   public static final String NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS = 
     NM_PREFIX + "health-checker.script.timeout-ms";
     NM_PREFIX + "health-checker.script.timeout-ms";
   public static final long DEFAULT_NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS = 
   public static final long DEFAULT_NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS = 

+ 3 - 2
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogFormat.java

@@ -31,6 +31,7 @@ import java.io.Writer;
 import java.security.PrivilegedExceptionAction;
 import java.security.PrivilegedExceptionAction;
 import java.util.EnumSet;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Map.Entry;
 
 
@@ -105,12 +106,12 @@ public class AggregatedLogFormat {
 
 
   public static class LogValue {
   public static class LogValue {
 
 
-    private final String[] rootLogDirs;
+    private final List<String> rootLogDirs;
     private final ContainerId containerId;
     private final ContainerId containerId;
     // TODO Maybe add a version string here. Instead of changing the version of
     // TODO Maybe add a version string here. Instead of changing the version of
     // the entire k-v format
     // the entire k-v format
 
 
-    public LogValue(String[] rootLogDirs, ContainerId containerId) {
+    public LogValue(List<String> rootLogDirs, ContainerId containerId) {
       this.rootLogDirs = rootLogDirs;
       this.rootLogDirs = rootLogDirs;
       this.containerId = containerId;
       this.containerId = containerId;
     }
     }

+ 15 - 28
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/FSDownload.java

@@ -33,7 +33,6 @@ import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.LocalDirAllocator;
 import org.apache.hadoop.fs.Options.Rename;
 import org.apache.hadoop.fs.Options.Rename;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -56,7 +55,10 @@ public class FSDownload implements Callable<Path> {
   private final UserGroupInformation userUgi;
   private final UserGroupInformation userUgi;
   private Configuration conf;
   private Configuration conf;
   private LocalResource resource;
   private LocalResource resource;
-  private LocalDirAllocator dirs;
+  
+  /** The local FS dir path under which this resource is to be localized to */
+  private Path destDirPath;
+
   private static final FsPermission cachePerms = new FsPermission(
   private static final FsPermission cachePerms = new FsPermission(
       (short) 0755);
       (short) 0755);
   static final FsPermission PUBLIC_FILE_PERMS = new FsPermission((short) 0555);
   static final FsPermission PUBLIC_FILE_PERMS = new FsPermission((short) 0555);
@@ -65,10 +67,11 @@ public class FSDownload implements Callable<Path> {
   static final FsPermission PUBLIC_DIR_PERMS = new FsPermission((short) 0755);
   static final FsPermission PUBLIC_DIR_PERMS = new FsPermission((short) 0755);
   static final FsPermission PRIVATE_DIR_PERMS = new FsPermission((short) 0700);
   static final FsPermission PRIVATE_DIR_PERMS = new FsPermission((short) 0700);
 
 
+
   public FSDownload(FileContext files, UserGroupInformation ugi, Configuration conf,
   public FSDownload(FileContext files, UserGroupInformation ugi, Configuration conf,
-      LocalDirAllocator dirs, LocalResource resource, Random rand) {
+      Path destDirPath, LocalResource resource, Random rand) {
     this.conf = conf;
     this.conf = conf;
-    this.dirs = dirs;
+    this.destDirPath = destDirPath;
     this.files = files;
     this.files = files;
     this.userUgi = ugi;
     this.userUgi = ugi;
     this.resource = resource;
     this.resource = resource;
@@ -136,15 +139,13 @@ public class FSDownload implements Callable<Path> {
     }
     }
 
 
     Path tmp;
     Path tmp;
-    Path dst =
-        dirs.getLocalPathForWrite(".", getEstimatedSize(resource),
-            conf);
     do {
     do {
-      tmp = new Path(dst, String.valueOf(rand.nextLong()));
+      tmp = new Path(destDirPath, String.valueOf(rand.nextLong()));
     } while (files.util().exists(tmp));
     } while (files.util().exists(tmp));
-    dst = tmp;
-    files.mkdir(dst, cachePerms, false);
-    final Path dst_work = new Path(dst + "_tmp");
+    destDirPath = tmp;
+
+    files.mkdir(destDirPath, cachePerms, false);
+    final Path dst_work = new Path(destDirPath + "_tmp");
     files.mkdir(dst_work, cachePerms, false);
     files.mkdir(dst_work, cachePerms, false);
 
 
     Path dFinal = files.makeQualified(new Path(dst_work, sCopy.getName()));
     Path dFinal = files.makeQualified(new Path(dst_work, sCopy.getName()));
@@ -158,9 +159,9 @@ public class FSDownload implements Callable<Path> {
       });
       });
       unpack(new File(dTmp.toUri()), new File(dFinal.toUri()));
       unpack(new File(dTmp.toUri()), new File(dFinal.toUri()));
       changePermissions(dFinal.getFileSystem(conf), dFinal);
       changePermissions(dFinal.getFileSystem(conf), dFinal);
-      files.rename(dst_work, dst, Rename.OVERWRITE);
+      files.rename(dst_work, destDirPath, Rename.OVERWRITE);
     } catch (Exception e) {
     } catch (Exception e) {
-      try { files.delete(dst, true); } catch (IOException ignore) { }
+      try { files.delete(destDirPath, true); } catch (IOException ignore) { }
       throw e;
       throw e;
     } finally {
     } finally {
       try {
       try {
@@ -170,9 +171,8 @@ public class FSDownload implements Callable<Path> {
       rand = null;
       rand = null;
       conf = null;
       conf = null;
       resource = null;
       resource = null;
-      dirs = null;
     }
     }
-    return files.makeQualified(new Path(dst, sCopy.getName()));
+    return files.makeQualified(new Path(destDirPath, sCopy.getName()));
   }
   }
 
 
   /**
   /**
@@ -221,17 +221,4 @@ public class FSDownload implements Callable<Path> {
     }
     }
   }
   }
 
 
-  private static long getEstimatedSize(LocalResource rsrc) {
-    if (rsrc.getSize() < 0) {
-      return -1;
-    }
-    switch (rsrc.getType()) {
-      case ARCHIVE:
-        return 5 * rsrc.getSize();
-      case FILE:
-      default:
-        return rsrc.getSize();
-    }
-  }
-
 }
 }

+ 11 - 8
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestFSDownload.java

@@ -146,13 +146,14 @@ public class TestFSDownload {
         vis = LocalResourceVisibility.APPLICATION;
         vis = LocalResourceVisibility.APPLICATION;
         break;       
         break;       
       }
       }
-      
-      LocalResource rsrc = createFile(files, new Path(basedir, "" + i),
-          sizes[i], rand, vis);
+      Path p = new Path(basedir, "" + i);
+      LocalResource rsrc = createFile(files, p, sizes[i], rand, vis);
       rsrcVis.put(rsrc, vis);
       rsrcVis.put(rsrc, vis);
+      Path destPath = dirs.getLocalPathForWrite(
+          basedir.toString(), sizes[i], conf);
       FSDownload fsd =
       FSDownload fsd =
           new FSDownload(files, UserGroupInformation.getCurrentUser(), conf,
           new FSDownload(files, UserGroupInformation.getCurrentUser(), conf,
-              dirs, rsrc, new Random(sharedSeed));
+              destPath, rsrc, new Random(sharedSeed));
       pending.put(rsrc, exec.submit(fsd));
       pending.put(rsrc, exec.submit(fsd));
     }
     }
 
 
@@ -249,13 +250,15 @@ public class TestFSDownload {
         vis = LocalResourceVisibility.APPLICATION;
         vis = LocalResourceVisibility.APPLICATION;
         break;       
         break;       
       }
       }
-      
-      LocalResource rsrc = createJar(files, new Path(basedir, "dir" + i
-          + ".jar"), vis);
+
+      Path p = new Path(basedir, "dir" + i + ".jar");
+      LocalResource rsrc = createJar(files, p, vis);
       rsrcVis.put(rsrc, vis);
       rsrcVis.put(rsrc, vis);
+      Path destPath = dirs.getLocalPathForWrite(
+          basedir.toString(), conf);
       FSDownload fsd =
       FSDownload fsd =
           new FSDownload(files, UserGroupInformation.getCurrentUser(), conf,
           new FSDownload(files, UserGroupInformation.getCurrentUser(), conf,
-              dirs, rsrc, new Random(sharedSeed));
+              destPath, rsrc, new Random(sharedSeed));
       pending.put(rsrc, exec.submit(fsd));
       pending.put(rsrc, exec.submit(fsd));
     }
     }
     
     

+ 16 - 0
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/resources/yarn-default.xml

@@ -388,6 +388,22 @@
     <value></value>
     <value></value>
   </property>
   </property>
 
 
+  <property>
+    <description>Frequency of running disk health checker code.</description>
+    <name>yarn.nodemanager.disk-health-checker.interval-ms</name>
+    <value>120000</value>
+  </property>
+
+  <property>
+    <description>The minimum fraction of number of disks to be healthy for the
+    nodemanager to launch new containers. This correspond to both
+    yarn-nodemanager.local-dirs and yarn.nodemanager.log-dirs. i.e. If there
+    are less number of healthy local-dirs (or log-dirs) available, then
+    new containers will not be launched on this node.</description>
+    <name>yarn.nodemanager.disk-health-checker.min-healthy-disks</name>
+    <value>0.25</value>
+  </property>
+
   <property>
   <property>
     <description>The path to the Linux container executor.</description>
     <description>The path to the Linux container executor.</description>
     <name>yarn.nodemanager.linux-container-executor.path</name>
     <name>yarn.nodemanager.linux-container-executor.path</name>

+ 11 - 4
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java

@@ -45,6 +45,7 @@ public abstract class ContainerExecutor implements Configurable {
     FsPermission.createImmutable((short) 0700);
     FsPermission.createImmutable((short) 0700);
 
 
   private Configuration conf;
   private Configuration conf;
+
   private ConcurrentMap<ContainerId, Path> pidFiles =
   private ConcurrentMap<ContainerId, Path> pidFiles =
       new ConcurrentHashMap<ContainerId, Path>();
       new ConcurrentHashMap<ContainerId, Path>();
 
 
@@ -68,7 +69,7 @@ public abstract class ContainerExecutor implements Configurable {
    * @throws IOException
    * @throws IOException
    */
    */
   public abstract void init() throws IOException;
   public abstract void init() throws IOException;
-  
+
   /**
   /**
    * Prepare the environment for containers in this application to execute.
    * Prepare the environment for containers in this application to execute.
    * For $x in local.dirs
    * For $x in local.dirs
@@ -82,12 +83,14 @@ public abstract class ContainerExecutor implements Configurable {
    * @param appId id of the application
    * @param appId id of the application
    * @param nmPrivateContainerTokens path to localized credentials, rsrc by NM
    * @param nmPrivateContainerTokens path to localized credentials, rsrc by NM
    * @param nmAddr RPC address to contact NM
    * @param nmAddr RPC address to contact NM
+   * @param localDirs nm-local-dirs
+   * @param logDirs nm-log-dirs
    * @throws IOException For most application init failures
    * @throws IOException For most application init failures
    * @throws InterruptedException If application init thread is halted by NM
    * @throws InterruptedException If application init thread is halted by NM
    */
    */
   public abstract void startLocalizer(Path nmPrivateContainerTokens,
   public abstract void startLocalizer(Path nmPrivateContainerTokens,
       InetSocketAddress nmAddr, String user, String appId, String locId,
       InetSocketAddress nmAddr, String user, String appId, String locId,
-      List<Path> localDirs)
+      List<String> localDirs, List<String> logDirs)
     throws IOException, InterruptedException;
     throws IOException, InterruptedException;
 
 
 
 
@@ -100,12 +103,15 @@ public abstract class ContainerExecutor implements Configurable {
    * @param user the user of the container
    * @param user the user of the container
    * @param appId the appId of the container
    * @param appId the appId of the container
    * @param containerWorkDir the work dir for the container
    * @param containerWorkDir the work dir for the container
+   * @param localDirs nm-local-dirs to be used for this container
+   * @param logDirs nm-log-dirs to be used for this container
    * @return the return status of the launch
    * @return the return status of the launch
    * @throws IOException
    * @throws IOException
    */
    */
   public abstract int launchContainer(Container container,
   public abstract int launchContainer(Container container,
       Path nmPrivateContainerScriptPath, Path nmPrivateTokensPath,
       Path nmPrivateContainerScriptPath, Path nmPrivateTokensPath,
-      String user, String appId, Path containerWorkDir) throws IOException;
+      String user, String appId, Path containerWorkDir, List<String> localDirs,
+      List<String> logDirs) throws IOException;
 
 
   public abstract boolean signalContainer(String user, String pid,
   public abstract boolean signalContainer(String user, String pid,
       Signal signal)
       Signal signal)
@@ -116,7 +122,8 @@ public abstract class ContainerExecutor implements Configurable {
 
 
   public enum ExitCode {
   public enum ExitCode {
     FORCE_KILLED(137),
     FORCE_KILLED(137),
-    TERMINATED(143);
+    TERMINATED(143),
+    DISKS_FAILED(-101);
     private final int code;
     private final int code;
 
 
     private ExitCode(int exitCode) {
     private ExitCode(int exitCode) {

+ 41 - 37
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java

@@ -26,6 +26,7 @@ import java.io.File;
 import java.io.IOException;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.io.PrintStream;
 import java.net.InetSocketAddress;
 import java.net.InetSocketAddress;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Arrays;
 import java.util.EnumSet;
 import java.util.EnumSet;
 import java.util.List;
 import java.util.List;
@@ -39,7 +40,6 @@ import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.util.Shell.ExitCodeException;
 import org.apache.hadoop.util.Shell.ExitCodeException;
 import org.apache.hadoop.util.Shell.ShellCommandExecutor;
 import org.apache.hadoop.util.Shell.ShellCommandExecutor;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent;
@@ -77,16 +77,17 @@ public class DefaultContainerExecutor extends ContainerExecutor {
   @Override
   @Override
   public void startLocalizer(Path nmPrivateContainerTokensPath,
   public void startLocalizer(Path nmPrivateContainerTokensPath,
       InetSocketAddress nmAddr, String user, String appId, String locId,
       InetSocketAddress nmAddr, String user, String appId, String locId,
-      List<Path> localDirs) throws IOException, InterruptedException {
+      List<String> localDirs, List<String> logDirs)
+      throws IOException, InterruptedException {
 
 
     ContainerLocalizer localizer =
     ContainerLocalizer localizer =
-        new ContainerLocalizer(this.lfs, user, appId, locId,
-            localDirs, RecordFactoryProvider.getRecordFactory(getConf()));
+        new ContainerLocalizer(lfs, user, appId, locId, getPaths(localDirs),
+            RecordFactoryProvider.getRecordFactory(getConf()));
 
 
     createUserLocalDirs(localDirs, user);
     createUserLocalDirs(localDirs, user);
     createUserCacheDirs(localDirs, user);
     createUserCacheDirs(localDirs, user);
     createAppDirs(localDirs, user, appId);
     createAppDirs(localDirs, user, appId);
-    createAppLogDirs(appId);
+    createAppLogDirs(appId, logDirs);
 
 
     // TODO: Why pick first app dir. The same in LCE why not random?
     // TODO: Why pick first app dir. The same in LCE why not random?
     Path appStorageDir = getFirstApplicationDir(localDirs, user, appId);
     Path appStorageDir = getFirstApplicationDir(localDirs, user, appId);
@@ -104,8 +105,8 @@ public class DefaultContainerExecutor extends ContainerExecutor {
   @Override
   @Override
   public int launchContainer(Container container,
   public int launchContainer(Container container,
       Path nmPrivateContainerScriptPath, Path nmPrivateTokensPath,
       Path nmPrivateContainerScriptPath, Path nmPrivateTokensPath,
-      String userName, String appId, Path containerWorkDir)
-      throws IOException {
+      String userName, String appId, Path containerWorkDir,
+      List<String> localDirs, List<String> logDirs) throws IOException {
 
 
     ContainerId containerId = container.getContainerID();
     ContainerId containerId = container.getContainerID();
 
 
@@ -115,10 +116,7 @@ public class DefaultContainerExecutor extends ContainerExecutor {
         ConverterUtils.toString(
         ConverterUtils.toString(
             container.getContainerID().getApplicationAttemptId().
             container.getContainerID().getApplicationAttemptId().
                 getApplicationId());
                 getApplicationId());
-    String[] sLocalDirs = getConf().getStrings(
-        YarnConfiguration.NM_LOCAL_DIRS,
-        YarnConfiguration.DEFAULT_NM_LOCAL_DIRS);
-    for (String sLocalDir : sLocalDirs) {
+    for (String sLocalDir : localDirs) {
       Path usersdir = new Path(sLocalDir, ContainerLocalizer.USERCACHE);
       Path usersdir = new Path(sLocalDir, ContainerLocalizer.USERCACHE);
       Path userdir = new Path(usersdir, userName);
       Path userdir = new Path(usersdir, userName);
       Path appCacheDir = new Path(userdir, ContainerLocalizer.APPCACHE);
       Path appCacheDir = new Path(userdir, ContainerLocalizer.APPCACHE);
@@ -128,7 +126,7 @@ public class DefaultContainerExecutor extends ContainerExecutor {
     }
     }
 
 
     // Create the container log-dirs on all disks
     // Create the container log-dirs on all disks
-    createContainerLogDirs(appIdStr, containerIdStr);
+    createContainerLogDirs(appIdStr, containerIdStr, logDirs);
 
 
     // copy launch script to work dir
     // copy launch script to work dir
     Path launchDst =
     Path launchDst =
@@ -299,9 +297,9 @@ public class DefaultContainerExecutor extends ContainerExecutor {
    * $logdir/$user/$appId */
    * $logdir/$user/$appId */
   private static final short LOGDIR_PERM = (short)0710;
   private static final short LOGDIR_PERM = (short)0710;
 
 
-  private Path getFirstApplicationDir(List<Path> localDirs, String user,
+  private Path getFirstApplicationDir(List<String> localDirs, String user,
       String appId) {
       String appId) {
-    return getApplicationDir(localDirs.get(0), user, appId);
+    return getApplicationDir(new Path(localDirs.get(0)), user, appId);
   }
   }
 
 
   private Path getApplicationDir(Path base, String user, String appId) {
   private Path getApplicationDir(Path base, String user, String appId) {
@@ -328,14 +326,14 @@ public class DefaultContainerExecutor extends ContainerExecutor {
    * <li>$local.dir/usercache/$user</li>
    * <li>$local.dir/usercache/$user</li>
    * </ul>
    * </ul>
    */
    */
-  private void createUserLocalDirs(List<Path> localDirs, String user)
+  private void createUserLocalDirs(List<String> localDirs, String user)
       throws IOException {
       throws IOException {
     boolean userDirStatus = false;
     boolean userDirStatus = false;
     FsPermission userperms = new FsPermission(USER_PERM);
     FsPermission userperms = new FsPermission(USER_PERM);
-    for (Path localDir : localDirs) {
+    for (String localDir : localDirs) {
       // create $local.dir/usercache/$user and its immediate parent
       // create $local.dir/usercache/$user and its immediate parent
       try {
       try {
-        lfs.mkdir(getUserCacheDir(localDir, user), userperms, true);
+        lfs.mkdir(getUserCacheDir(new Path(localDir), user), userperms, true);
       } catch (IOException e) {
       } catch (IOException e) {
         LOG.warn("Unable to create the user directory : " + localDir, e);
         LOG.warn("Unable to create the user directory : " + localDir, e);
         continue;
         continue;
@@ -357,7 +355,7 @@ public class DefaultContainerExecutor extends ContainerExecutor {
    * <li>$local.dir/usercache/$user/filecache</li>
    * <li>$local.dir/usercache/$user/filecache</li>
    * </ul>
    * </ul>
    */
    */
-  private void createUserCacheDirs(List<Path> localDirs, String user)
+  private void createUserCacheDirs(List<String> localDirs, String user)
       throws IOException {
       throws IOException {
     LOG.info("Initializing user " + user);
     LOG.info("Initializing user " + user);
 
 
@@ -366,9 +364,10 @@ public class DefaultContainerExecutor extends ContainerExecutor {
     FsPermission appCachePerms = new FsPermission(APPCACHE_PERM);
     FsPermission appCachePerms = new FsPermission(APPCACHE_PERM);
     FsPermission fileperms = new FsPermission(FILECACHE_PERM);
     FsPermission fileperms = new FsPermission(FILECACHE_PERM);
 
 
-    for (Path localDir : localDirs) {
+    for (String localDir : localDirs) {
       // create $local.dir/usercache/$user/appcache
       // create $local.dir/usercache/$user/appcache
-      final Path appDir = getAppcacheDir(localDir, user);
+      Path localDirPath = new Path(localDir);
+      final Path appDir = getAppcacheDir(localDirPath, user);
       try {
       try {
         lfs.mkdir(appDir, appCachePerms, true);
         lfs.mkdir(appDir, appCachePerms, true);
         appcacheDirStatus = true;
         appcacheDirStatus = true;
@@ -376,7 +375,7 @@ public class DefaultContainerExecutor extends ContainerExecutor {
         LOG.warn("Unable to create app cache directory : " + appDir, e);
         LOG.warn("Unable to create app cache directory : " + appDir, e);
       }
       }
       // create $local.dir/usercache/$user/filecache
       // create $local.dir/usercache/$user/filecache
-      final Path distDir = getFileCacheDir(localDir, user);
+      final Path distDir = getFileCacheDir(localDirPath, user);
       try {
       try {
         lfs.mkdir(distDir, fileperms, true);
         lfs.mkdir(distDir, fileperms, true);
         distributedCacheDirStatus = true;
         distributedCacheDirStatus = true;
@@ -403,12 +402,12 @@ public class DefaultContainerExecutor extends ContainerExecutor {
    * </ul>
    * </ul>
    * @param localDirs 
    * @param localDirs 
    */
    */
-  private void createAppDirs(List<Path> localDirs, String user, String appId)
+  private void createAppDirs(List<String> localDirs, String user, String appId)
       throws IOException {
       throws IOException {
     boolean initAppDirStatus = false;
     boolean initAppDirStatus = false;
     FsPermission appperms = new FsPermission(APPDIR_PERM);
     FsPermission appperms = new FsPermission(APPDIR_PERM);
-    for (Path localDir : localDirs) {
-      Path fullAppDir = getApplicationDir(localDir, user, appId);
+    for (String localDir : localDirs) {
+      Path fullAppDir = getApplicationDir(new Path(localDir), user, appId);
       // create $local.dir/usercache/$user/appcache/$appId
       // create $local.dir/usercache/$user/appcache/$appId
       try {
       try {
         lfs.mkdir(fullAppDir, appperms, true);
         lfs.mkdir(fullAppDir, appperms, true);
@@ -427,15 +426,12 @@ public class DefaultContainerExecutor extends ContainerExecutor {
   /**
   /**
    * Create application log directories on all disks.
    * Create application log directories on all disks.
    */
    */
-  private void createAppLogDirs(String appId)
+  private void createAppLogDirs(String appId, List<String> logDirs)
       throws IOException {
       throws IOException {
-    String[] rootLogDirs =
-        getConf()
-            .getStrings(YarnConfiguration.NM_LOG_DIRS, YarnConfiguration.DEFAULT_NM_LOG_DIRS);
-    
+
     boolean appLogDirStatus = false;
     boolean appLogDirStatus = false;
     FsPermission appLogDirPerms = new FsPermission(LOGDIR_PERM);
     FsPermission appLogDirPerms = new FsPermission(LOGDIR_PERM);
-    for (String rootLogDir : rootLogDirs) {
+    for (String rootLogDir : logDirs) {
       // create $log.dir/$appid
       // create $log.dir/$appid
       Path appLogDir = new Path(rootLogDir, appId);
       Path appLogDir = new Path(rootLogDir, appId);
       try {
       try {
@@ -455,15 +451,12 @@ public class DefaultContainerExecutor extends ContainerExecutor {
   /**
   /**
    * Create application log directories on all disks.
    * Create application log directories on all disks.
    */
    */
-  private void createContainerLogDirs(String appId, String containerId)
-      throws IOException {
-    String[] rootLogDirs =
-        getConf()
-            .getStrings(YarnConfiguration.NM_LOG_DIRS, YarnConfiguration.DEFAULT_NM_LOG_DIRS);
-    
+  private void createContainerLogDirs(String appId, String containerId,
+      List<String> logDirs) throws IOException {
+
     boolean containerLogDirStatus = false;
     boolean containerLogDirStatus = false;
     FsPermission containerLogDirPerms = new FsPermission(LOGDIR_PERM);
     FsPermission containerLogDirPerms = new FsPermission(LOGDIR_PERM);
-    for (String rootLogDir : rootLogDirs) {
+    for (String rootLogDir : logDirs) {
       // create $log.dir/$appid/$containerid
       // create $log.dir/$appid/$containerid
       Path appLogDir = new Path(rootLogDir, appId);
       Path appLogDir = new Path(rootLogDir, appId);
       Path containerLogDir = new Path(appLogDir, containerId);
       Path containerLogDir = new Path(appLogDir, containerId);
@@ -483,4 +476,15 @@ public class DefaultContainerExecutor extends ContainerExecutor {
               + containerId);
               + containerId);
     }
     }
   }
   }
+
+  /**
+   * @return the list of paths of given local directories
+   */
+  private static List<Path> getPaths(List<String> dirs) {
+    List<Path> paths = new ArrayList<Path>(dirs.size());
+    for (int i = 0; i < dirs.size(); i++) {
+      paths.add(new Path(dirs.get(i)));
+    }
+    return paths;
+  }
 }
 }

+ 96 - 0
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java

@@ -0,0 +1,96 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.server.nodemanager;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.ListIterator;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.util.DiskChecker;
+import org.apache.hadoop.util.DiskChecker.DiskErrorException;
+
+/**
+ * Manages a list of local storage directories.
+ */
+class DirectoryCollection {
+  private static final Log LOG = LogFactory.getLog(DirectoryCollection.class);
+
+  // Good local storage directories
+  private List<String> localDirs;
+  private List<String> failedDirs;
+  private int numFailures;
+
+  public DirectoryCollection(String[] dirs) {
+    localDirs = new ArrayList<String>();
+    localDirs.addAll(Arrays.asList(dirs));
+    failedDirs = new ArrayList<String>();
+  }
+
+  /**
+   * @return the current valid directories 
+   */
+  synchronized List<String> getGoodDirs() {
+    return localDirs;
+  }
+
+  /**
+   * @return the failed directories
+   */
+  synchronized List<String> getFailedDirs() {
+    return failedDirs;
+  }
+
+  /**
+   * @return total the number of directory failures seen till now
+   */
+  synchronized int getNumFailures() {
+    return numFailures;
+  }
+
+  /**
+   * Check the health of current set of local directories, updating the list
+   * of valid directories if necessary.
+   * @return <em>true</em> if there is a new disk-failure identified in
+   *         this checking. <em>false</em> otherwise.
+   */
+  synchronized boolean checkDirs() {
+    int oldNumFailures = numFailures;
+    ListIterator<String> it = localDirs.listIterator();
+    while (it.hasNext()) {
+      final String dir = it.next();
+      try {
+        DiskChecker.checkDir(new File(dir));
+      } catch (DiskErrorException de) {
+        LOG.warn("Directory " + dir + " error " +
+            de.getMessage() + ", removing from the list of valid directories.");
+        it.remove();
+        failedDirs.add(dir);
+        numFailures++;
+      }
+    }
+    if (numFailures > oldNumFailures) {
+      return true;
+    }
+    return false;
+  }
+}

+ 15 - 7
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java

@@ -126,13 +126,18 @@ public class LinuxContainerExecutor extends ContainerExecutor {
   @Override
   @Override
   public void startLocalizer(Path nmPrivateContainerTokensPath,
   public void startLocalizer(Path nmPrivateContainerTokensPath,
       InetSocketAddress nmAddr, String user, String appId, String locId,
       InetSocketAddress nmAddr, String user, String appId, String locId,
-      List<Path> localDirs) throws IOException, InterruptedException {
+      List<String> localDirs, List<String> logDirs)
+      throws IOException, InterruptedException {
+
     List<String> command = new ArrayList<String>(
     List<String> command = new ArrayList<String>(
       Arrays.asList(containerExecutorExe, 
       Arrays.asList(containerExecutorExe, 
                     user, 
                     user, 
                     Integer.toString(Commands.INITIALIZE_CONTAINER.getValue()),
                     Integer.toString(Commands.INITIALIZE_CONTAINER.getValue()),
                     appId,
                     appId,
-                    nmPrivateContainerTokensPath.toUri().getPath().toString()));
+                    nmPrivateContainerTokensPath.toUri().getPath().toString(),
+                    StringUtils.join(",", localDirs),
+                    StringUtils.join(",", logDirs)));
+
     File jvm =                                  // use same jvm as parent
     File jvm =                                  // use same jvm as parent
       new File(new File(System.getProperty("java.home"), "bin"), "java");
       new File(new File(System.getProperty("java.home"), "bin"), "java");
     command.add(jvm.toString());
     command.add(jvm.toString());
@@ -148,8 +153,8 @@ public class LinuxContainerExecutor extends ContainerExecutor {
     command.add(locId);
     command.add(locId);
     command.add(nmAddr.getHostName());
     command.add(nmAddr.getHostName());
     command.add(Integer.toString(nmAddr.getPort()));
     command.add(Integer.toString(nmAddr.getPort()));
-    for (Path p : localDirs) {
-      command.add(p.toUri().getPath().toString());
+    for (String dir : localDirs) {
+      command.add(dir);
     }
     }
     String[] commandArray = command.toArray(new String[command.size()]);
     String[] commandArray = command.toArray(new String[command.size()]);
     ShellCommandExecutor shExec = new ShellCommandExecutor(commandArray);
     ShellCommandExecutor shExec = new ShellCommandExecutor(commandArray);
@@ -174,7 +179,8 @@ public class LinuxContainerExecutor extends ContainerExecutor {
   @Override
   @Override
   public int launchContainer(Container container,
   public int launchContainer(Container container,
       Path nmPrivateCotainerScriptPath, Path nmPrivateTokensPath,
       Path nmPrivateCotainerScriptPath, Path nmPrivateTokensPath,
-      String user, String appId, Path containerWorkDir) throws IOException {
+      String user, String appId, Path containerWorkDir,
+      List<String> localDirs, List<String> logDirs) throws IOException {
 
 
     ContainerId containerId = container.getContainerID();
     ContainerId containerId = container.getContainerID();
     String containerIdStr = ConverterUtils.toString(containerId);
     String containerIdStr = ConverterUtils.toString(containerId);
@@ -189,8 +195,10 @@ public class LinuxContainerExecutor extends ContainerExecutor {
                 .toString(Commands.LAUNCH_CONTAINER.getValue()), appId,
                 .toString(Commands.LAUNCH_CONTAINER.getValue()), appId,
             containerIdStr, containerWorkDir.toString(),
             containerIdStr, containerWorkDir.toString(),
             nmPrivateCotainerScriptPath.toUri().getPath().toString(),
             nmPrivateCotainerScriptPath.toUri().getPath().toString(),
-            nmPrivateTokensPath.toUri().getPath().toString(), pidFilePath
-                .toString()));
+            nmPrivateTokensPath.toUri().getPath().toString(),
+            pidFilePath.toString(),
+            StringUtils.join(",", localDirs),
+            StringUtils.join(",", logDirs)));
         String[] commandArray = command.toArray(new String[command.size()]);
         String[] commandArray = command.toArray(new String[command.size()]);
         shExec = new ShellCommandExecutor(commandArray, null, // NM's cwd
         shExec = new ShellCommandExecutor(commandArray, null, // NM's cwd
             container.getLaunchContext().getEnvironment()); // sanitized env
             container.getLaunchContext().getEnvironment()); // sanitized env

+ 297 - 0
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java

@@ -0,0 +1,297 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Timer;
+import java.util.TimerTask;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.LocalDirAllocator;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.service.AbstractService;
+
+/**
+ * The class which provides functionality of checking the health of the local
+ * directories of a node. This specifically manages nodemanager-local-dirs and
+ * nodemanager-log-dirs by periodically checking their health.
+ */
+public class LocalDirsHandlerService extends AbstractService {
+
+  private static Log LOG = LogFactory.getLog(LocalDirsHandlerService.class);
+
+  /** Timer used to schedule disk health monitoring code execution */
+  private Timer dirsHandlerScheduler;
+  private long diskHealthCheckInterval;
+  private boolean isDiskHealthCheckerEnabled;
+  /**
+   * Minimum fraction of disks to be healthy for the node to be healthy in
+   * terms of disks. This applies to nm-local-dirs and nm-log-dirs.
+   */
+  private float minNeededHealthyDisksFactor;
+
+  private MonitoringTimerTask monitoringTimerTask;
+
+  /** Local dirs to store localized files in */
+  private DirectoryCollection localDirs = null;
+
+  /** storage for container logs*/
+  private DirectoryCollection logDirs = null;
+
+  /**
+   * Everybody should go through this LocalDirAllocator object for read/write
+   * of any local path corresponding to {@link YarnConfiguration#NM_LOCAL_DIRS}
+   * instead of creating his/her own LocalDirAllocator objects
+   */ 
+  private LocalDirAllocator localDirsAllocator;
+  /**
+   * Everybody should go through this LocalDirAllocator object for read/write
+   * of any local path corresponding to {@link YarnConfiguration#NM_LOG_DIRS}
+   * instead of creating his/her own LocalDirAllocator objects
+   */ 
+  private LocalDirAllocator logDirsAllocator;
+
+  /** when disk health checking code was last run */
+  private long lastDisksCheckTime;
+
+  /**
+   * Class which is used by the {@link Timer} class to periodically execute the
+   * disks' health checker code.
+   */
+  private final class MonitoringTimerTask extends TimerTask {
+
+    public MonitoringTimerTask(Configuration conf) {
+      localDirs = new DirectoryCollection(
+          conf.getTrimmedStrings(YarnConfiguration.NM_LOCAL_DIRS));
+      logDirs = new DirectoryCollection(
+          conf.getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS));
+      localDirsAllocator =
+          new LocalDirAllocator(YarnConfiguration.NM_LOCAL_DIRS);
+      logDirsAllocator = new LocalDirAllocator(YarnConfiguration.NM_LOG_DIRS);
+    }
+
+    @Override
+    public void run() {
+      boolean newFailure = false;
+      if (localDirs.checkDirs()) {
+        newFailure = true;
+      }
+      if (logDirs.checkDirs()) {
+        newFailure = true;
+      }
+
+      if (newFailure) {
+        LOG.info("Disk(s) failed. " + getDisksHealthReport());
+        updateDirsInConfiguration();
+        if (!areDisksHealthy()) {
+          // Just log.
+          LOG.error("Most of the disks failed. " + getDisksHealthReport());
+        }
+      }
+      lastDisksCheckTime = System.currentTimeMillis();
+    }
+  }
+
+  public LocalDirsHandlerService() {
+    super(LocalDirsHandlerService.class.getName());
+  }
+
+  /**
+   * Method which initializes the timertask and its interval time.
+   */
+  @Override
+  public void init(Configuration config) {
+    // Clone the configuration as we may do modifications to dirs-list
+    Configuration conf = new Configuration(config);
+    diskHealthCheckInterval = conf.getLong(
+        YarnConfiguration.NM_DISK_HEALTH_CHECK_INTERVAL_MS,
+        YarnConfiguration.DEFAULT_NM_DISK_HEALTH_CHECK_INTERVAL_MS);
+    monitoringTimerTask = new MonitoringTimerTask(conf);
+    isDiskHealthCheckerEnabled = conf.getBoolean(
+        YarnConfiguration.NM_DISK_HEALTH_CHECK_ENABLE, true);
+    minNeededHealthyDisksFactor = conf.getFloat(
+        YarnConfiguration.NM_MIN_HEALTHY_DISKS_FRACTION,
+        YarnConfiguration.DEFAULT_NM_MIN_HEALTHY_DISKS_FRACTION);
+    lastDisksCheckTime = System.currentTimeMillis();
+    super.init(conf);
+  }
+
+  /**
+   * Method used to start the disk health monitoring, if enabled.
+   */
+  @Override
+  public void start() {
+    if (isDiskHealthCheckerEnabled) {
+      dirsHandlerScheduler = new Timer("DiskHealthMonitor-Timer", true);
+      // Start the timer task for disk health checking immediately and
+      // then run periodically at interval time.
+      dirsHandlerScheduler.scheduleAtFixedRate(monitoringTimerTask, 0,
+                                                   diskHealthCheckInterval);
+    }
+    super.start();
+  }
+
+  /**
+   * Method used to terminate the disk health monitoring service.
+   */
+  @Override
+  public void stop() {
+    if (dirsHandlerScheduler != null) {
+      dirsHandlerScheduler.cancel();
+    }
+    super.stop();
+  }
+
+  /**
+   * @return the good/valid local directories based on disks' health
+   */
+  public List<String> getLocalDirs() {
+    return localDirs.getGoodDirs();
+  }
+
+  /**
+   * @return the good/valid log directories based on disks' health
+   */
+  public List<String> getLogDirs() {
+    return logDirs.getGoodDirs();
+  }
+
+  /**
+   * @return the health report of nm-local-dirs and nm-log-dirs
+   */
+  public String getDisksHealthReport() {
+    if (!isDiskHealthCheckerEnabled) {
+      return "";
+    }
+
+    StringBuilder report = new StringBuilder();
+    List<String> failedLocalDirsList = localDirs.getFailedDirs();
+    List<String> failedLogDirsList = logDirs.getFailedDirs();
+    int numLocalDirs = localDirs.getGoodDirs().size()
+        + failedLocalDirsList.size();
+    int numLogDirs = logDirs.getGoodDirs().size() + failedLogDirsList.size();
+    if (!failedLocalDirsList.isEmpty()) {
+      report.append(failedLocalDirsList.size() + "/" + numLocalDirs
+          + " local-dirs turned bad: "
+          + StringUtils.join(",", failedLocalDirsList) + ";");
+    }
+    if (!failedLogDirsList.isEmpty()) {
+      report.append(failedLogDirsList.size() + "/" + numLogDirs
+          + " log-dirs turned bad: "
+          + StringUtils.join(",", failedLogDirsList));
+    }
+    return report.toString();
+  }
+
+  /**
+   * The minimum fraction of number of disks needed to be healthy for a node to
+   * be considered healthy in terms of disks is configured using
+   * {@link YarnConfiguration#NM_MIN_HEALTHY_DISKS_FRACTION}, with a default
+   * value of {@link YarnConfiguration#DEFAULT_NM_MIN_HEALTHY_DISKS_FRACTION}.
+   * @return <em>false</em> if either (a) more than the allowed percentage of
+   * nm-local-dirs failed or (b) more than the allowed percentage of
+   * nm-log-dirs failed.
+   */
+  public boolean areDisksHealthy() {
+    if (!isDiskHealthCheckerEnabled) {
+      return true;
+    }
+
+    int goodDirs = getLocalDirs().size();
+    int failedDirs = localDirs.getFailedDirs().size();
+    int totalConfiguredDirs = goodDirs + failedDirs;
+    if (goodDirs/(float)totalConfiguredDirs < minNeededHealthyDisksFactor) {
+      return false; // Not enough healthy local-dirs
+    }
+
+    goodDirs = getLogDirs().size();
+    failedDirs = logDirs.getFailedDirs().size();
+    totalConfiguredDirs = goodDirs + failedDirs;
+    if (goodDirs/(float)totalConfiguredDirs < minNeededHealthyDisksFactor) {
+      return false; // Not enough healthy log-dirs
+    }
+
+    return true;
+  }
+
+  public long getLastDisksCheckTime() {
+    return lastDisksCheckTime;
+  }
+
+  /**
+   * Set good local dirs and good log dirs in the configuration so that the
+   * LocalDirAllocator objects will use this updated configuration only.
+   */
+  private void updateDirsInConfiguration() {
+    Configuration conf = getConfig();
+    List<String> localDirs = getLocalDirs();
+    conf.setStrings(YarnConfiguration.NM_LOCAL_DIRS,
+                    localDirs.toArray(new String[localDirs.size()]));
+    List<String> logDirs = getLogDirs();
+    synchronized(conf) {
+      conf.setStrings(YarnConfiguration.NM_LOG_DIRS,
+                      logDirs.toArray(new String[logDirs.size()]));
+    }
+  }
+
+  public Path getLocalPathForWrite(String pathStr) throws IOException {
+    Configuration conf = getConfig();
+    Path path = null;
+    synchronized (conf) {
+      path = localDirsAllocator.getLocalPathForWrite(pathStr, conf);
+    }
+    return path;
+  }
+
+  public Path getLocalPathForWrite(String pathStr, long size,
+      boolean checkWrite) throws IOException {
+    Configuration conf = getConfig();
+    Path path = null;
+    synchronized (conf) {
+      path = localDirsAllocator.getLocalPathForWrite(pathStr, size, conf,
+                                                     checkWrite);
+    }
+    return path;
+  }
+
+  public Path getLogPathForWrite(String pathStr, boolean checkWrite)
+      throws IOException {
+    Configuration conf = getConfig();
+    Path path = null;
+    synchronized (conf) {
+      path = logDirsAllocator.getLocalPathForWrite(pathStr,
+        LocalDirAllocator.SIZE_UNKNOWN, conf, checkWrite);
+    }
+    return path;
+  }
+
+  public Path getLogPathToRead(String pathStr) throws IOException {
+    Configuration conf = getConfig();
+    Path path = null;
+    synchronized (conf) {
+      path = logDirsAllocator.getLocalPathToRead(pathStr, conf);
+    }
+    return path;
+  }
+}

+ 97 - 0
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthCheckerService.java

@@ -0,0 +1,97 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.server.nodemanager;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.service.CompositeService;
+
+/**
+ * The class which provides functionality of checking the health of the node and
+ * reporting back to the service for which the health checker has been asked to
+ * report.
+ */
+public class NodeHealthCheckerService extends CompositeService {
+
+  private NodeHealthScriptRunner nodeHealthScriptRunner;
+  private LocalDirsHandlerService dirsHandler;
+
+  static final String SEPARATOR = ";";
+
+  public NodeHealthCheckerService() {
+    super(NodeHealthCheckerService.class.getName());
+    dirsHandler = new LocalDirsHandlerService();
+  }
+
+  @Override
+  public void init(Configuration conf) {
+    if (NodeHealthScriptRunner.shouldRun(conf)) {
+      nodeHealthScriptRunner = new NodeHealthScriptRunner();
+      addService(nodeHealthScriptRunner);
+    }
+    addService(dirsHandler);
+    super.init(conf);
+  }
+
+  /**
+   * @return the reporting string of health of the node
+   */
+  String getHealthReport() {
+    String scriptReport = (nodeHealthScriptRunner == null) ? ""
+        : nodeHealthScriptRunner.getHealthReport();
+    if (scriptReport.equals("")) {
+      return dirsHandler.getDisksHealthReport();
+    } else {
+      return scriptReport.concat(SEPARATOR + dirsHandler.getDisksHealthReport());
+    }
+  }
+
+  /**
+   * @return <em>true</em> if the node is healthy
+   */
+  boolean isHealthy() {
+    boolean scriptHealthStatus = (nodeHealthScriptRunner == null) ? true
+        : nodeHealthScriptRunner.isHealthy();
+    return scriptHealthStatus && dirsHandler.areDisksHealthy();
+  }
+
+  /**
+   * @return when the last time the node health status is reported
+   */
+  long getLastHealthReportTime() {
+    long diskCheckTime = dirsHandler.getLastDisksCheckTime();
+    long lastReportTime = (nodeHealthScriptRunner == null)
+        ? diskCheckTime
+        : Math.max(nodeHealthScriptRunner.getLastReportedTime(), diskCheckTime);
+    return lastReportTime;
+  }
+
+  /**
+   * @return the disk handler
+   */
+  public LocalDirsHandlerService getDiskHandler() {
+    return dirsHandler;
+  }
+
+  /**
+   * @return the node health script runner
+   */
+  NodeHealthScriptRunner getNodeHealthScriptRunner() {
+    return nodeHealthScriptRunner;
+  }
+}

+ 19 - 40
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/NodeHealthCheckerService.java → hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthScriptRunner.java

@@ -16,7 +16,7 @@
  * limitations under the License.
  * limitations under the License.
  */
  */
 
 
-package org.apache.hadoop;
+package org.apache.hadoop.yarn.server.nodemanager;
 
 
 import java.io.File;
 import java.io.File;
 import java.io.IOException;
 import java.io.IOException;
@@ -31,19 +31,18 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.util.Shell.ExitCodeException;
 import org.apache.hadoop.util.Shell.ExitCodeException;
 import org.apache.hadoop.util.Shell.ShellCommandExecutor;
 import org.apache.hadoop.util.Shell.ShellCommandExecutor;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.StringUtils;
-import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.service.AbstractService;
 import org.apache.hadoop.yarn.service.AbstractService;
 
 
 /**
 /**
  * 
  * 
- * The class which provides functionality of checking the health of the node and
- * reporting back to the service for which the health checker has been asked to
- * report.
+ * The class which provides functionality of checking the health of the node
+ * using the configured node health script and reporting back to the service
+ * for which the health checker has been asked to report.
  */
  */
-public class NodeHealthCheckerService extends AbstractService {
+public class NodeHealthScriptRunner extends AbstractService {
 
 
-  private static Log LOG = LogFactory.getLog(NodeHealthCheckerService.class);
+  private static Log LOG = LogFactory.getLog(NodeHealthScriptRunner.class);
 
 
   /** Absolute path to the health script. */
   /** Absolute path to the health script. */
   private String nodeHealthScript;
   private String nodeHealthScript;
@@ -74,7 +73,6 @@ public class NodeHealthCheckerService extends AbstractService {
 
 
   private TimerTask timer;
   private TimerTask timer;
   
   
-  
   private enum HealthCheckerExitStatus {
   private enum HealthCheckerExitStatus {
     SUCCESS,
     SUCCESS,
     TIMED_OUT,
     TIMED_OUT,
@@ -187,18 +185,13 @@ public class NodeHealthCheckerService extends AbstractService {
     }
     }
   }
   }
 
 
-  public NodeHealthCheckerService() {
-    super(NodeHealthCheckerService.class.getName());
+  public NodeHealthScriptRunner() {
+    super(NodeHealthScriptRunner.class.getName());
     this.lastReportedTime = System.currentTimeMillis();
     this.lastReportedTime = System.currentTimeMillis();
     this.isHealthy = true;
     this.isHealthy = true;
     this.healthReport = "";    
     this.healthReport = "";    
   }
   }
 
 
-  public NodeHealthCheckerService(Configuration conf) {
-    this();
-    init(conf);
-  }
-
   /*
   /*
    * Method which initializes the values for the script path and interval time.
    * Method which initializes the values for the script path and interval time.
    */
    */
@@ -257,12 +250,12 @@ public class NodeHealthCheckerService extends AbstractService {
    * 
    * 
    * @return true if node is healthy
    * @return true if node is healthy
    */
    */
-  private boolean isHealthy() {
+  public boolean isHealthy() {
     return isHealthy;
     return isHealthy;
   }
   }
 
 
   /**
   /**
-   * Sets if the node is healhty or not.
+   * Sets if the node is healhty or not considering disks' health also.
    * 
    * 
    * @param isHealthy
    * @param isHealthy
    *          if or not node is healthy
    *          if or not node is healthy
@@ -277,13 +270,14 @@ public class NodeHealthCheckerService extends AbstractService {
    * 
    * 
    * @return output from health script
    * @return output from health script
    */
    */
-  private String getHealthReport() {
+  public String getHealthReport() {
     return healthReport;
     return healthReport;
   }
   }
 
 
   /**
   /**
-   * Sets the health report from the node health script.
-   * 
+   * Sets the health report from the node health script. Also set the disks'
+   * health info obtained from DiskHealthCheckerService.
+   *
    * @param healthReport
    * @param healthReport
    */
    */
   private synchronized void setHealthReport(String healthReport) {
   private synchronized void setHealthReport(String healthReport) {
@@ -295,7 +289,7 @@ public class NodeHealthCheckerService extends AbstractService {
    * 
    * 
    * @return timestamp when node health script was last run
    * @return timestamp when node health script was last run
    */
    */
-  private long getLastReportedTime() {
+  public long getLastReportedTime() {
     return lastReportedTime;
     return lastReportedTime;
   }
   }
 
 
@@ -340,27 +334,12 @@ public class NodeHealthCheckerService extends AbstractService {
     this.setHealthStatus(isHealthy, output);
     this.setHealthStatus(isHealthy, output);
     this.setLastReportedTime(time);
     this.setLastReportedTime(time);
   }
   }
-  
-  /**
-   * Method to populate the fields for the {@link NodeHealthStatus}
-   * 
-   * @param healthStatus
-   */
-  public synchronized void setHealthStatus(NodeHealthStatus healthStatus) {
-    healthStatus.setIsNodeHealthy(this.isHealthy());
-    healthStatus.setHealthReport(this.getHealthReport());
-    healthStatus.setLastHealthReportTime(this.getLastReportedTime());
-  }
-  
+
   /**
   /**
-   * Test method to directly access the timer which node 
-   * health checker would use.
-   * 
-   *
-   * @return Timer task
+   * Used only by tests to access the timer task directly
+   * @return the timer task
    */
    */
-  //XXX:Not to be used directly.
-  TimerTask getTimer() {
+  TimerTask getTimerTask() {
     return timer;
     return timer;
   }
   }
 }
 }

+ 23 - 17
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java

@@ -25,7 +25,6 @@ import java.util.concurrent.ConcurrentSkipListMap;
 
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.NodeHealthCheckerService;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.SecurityUtil;
@@ -59,6 +58,8 @@ public class NodeManager extends CompositeService implements
   protected final NodeManagerMetrics metrics = NodeManagerMetrics.create();
   protected final NodeManagerMetrics metrics = NodeManagerMetrics.create();
   protected ContainerTokenSecretManager containerTokenSecretManager;
   protected ContainerTokenSecretManager containerTokenSecretManager;
   private ApplicationACLsManager aclsManager;
   private ApplicationACLsManager aclsManager;
+  private NodeHealthCheckerService nodeHealthChecker;
+  private LocalDirsHandlerService dirsHandler;
 
 
   public NodeManager() {
   public NodeManager() {
     super(NodeManager.class.getName());
     super(NodeManager.class.getName());
@@ -78,14 +79,16 @@ public class NodeManager extends CompositeService implements
   protected ContainerManagerImpl createContainerManager(Context context,
   protected ContainerManagerImpl createContainerManager(Context context,
       ContainerExecutor exec, DeletionService del,
       ContainerExecutor exec, DeletionService del,
       NodeStatusUpdater nodeStatusUpdater, ContainerTokenSecretManager 
       NodeStatusUpdater nodeStatusUpdater, ContainerTokenSecretManager 
-      containerTokenSecretManager, ApplicationACLsManager aclsManager) {
+      containerTokenSecretManager, ApplicationACLsManager aclsManager,
+      LocalDirsHandlerService dirsHandler) {
     return new ContainerManagerImpl(context, exec, del, nodeStatusUpdater,
     return new ContainerManagerImpl(context, exec, del, nodeStatusUpdater,
-        metrics, containerTokenSecretManager, aclsManager);
+        metrics, containerTokenSecretManager, aclsManager, dirsHandler);
   }
   }
 
 
   protected WebServer createWebServer(Context nmContext,
   protected WebServer createWebServer(Context nmContext,
-      ResourceView resourceView, ApplicationACLsManager aclsManager) {
-    return new WebServer(nmContext, resourceView, aclsManager);
+      ResourceView resourceView, ApplicationACLsManager aclsManager,
+      LocalDirsHandlerService dirsHandler) {
+    return new WebServer(nmContext, resourceView, aclsManager, dirsHandler);
   }
   }
 
 
   protected void doSecureLogin() throws IOException {
   protected void doSecureLogin() throws IOException {
@@ -121,16 +124,12 @@ public class NodeManager extends CompositeService implements
     // NodeManager level dispatcher
     // NodeManager level dispatcher
     AsyncDispatcher dispatcher = new AsyncDispatcher();
     AsyncDispatcher dispatcher = new AsyncDispatcher();
 
 
-    NodeHealthCheckerService healthChecker = null;
-    if (NodeHealthCheckerService.shouldRun(conf)) {
-      healthChecker = new NodeHealthCheckerService();
-      addService(healthChecker);
-    }
+    nodeHealthChecker = new NodeHealthCheckerService();
+    addService(nodeHealthChecker);
+    dirsHandler = nodeHealthChecker.getDiskHandler();
 
 
-    NodeStatusUpdater nodeStatusUpdater =
-        createNodeStatusUpdater(context, dispatcher, healthChecker, 
-        this.containerTokenSecretManager);
-    
+    NodeStatusUpdater nodeStatusUpdater = createNodeStatusUpdater(context,
+        dispatcher, nodeHealthChecker, this.containerTokenSecretManager);
     nodeStatusUpdater.register(this);
     nodeStatusUpdater.register(this);
 
 
     NodeResourceMonitor nodeResourceMonitor = createNodeResourceMonitor();
     NodeResourceMonitor nodeResourceMonitor = createNodeResourceMonitor();
@@ -138,11 +137,11 @@ public class NodeManager extends CompositeService implements
 
 
     ContainerManagerImpl containerManager =
     ContainerManagerImpl containerManager =
         createContainerManager(context, exec, del, nodeStatusUpdater,
         createContainerManager(context, exec, del, nodeStatusUpdater,
-        this.containerTokenSecretManager, this.aclsManager);
+        this.containerTokenSecretManager, this.aclsManager, dirsHandler);
     addService(containerManager);
     addService(containerManager);
 
 
     Service webServer = createWebServer(context, containerManager
     Service webServer = createWebServer(context, containerManager
-        .getContainersMonitor(), this.aclsManager);
+        .getContainersMonitor(), this.aclsManager, dirsHandler);
     addService(webServer);
     addService(webServer);
 
 
     dispatcher.register(ContainerManagerEventType.class, containerManager);
     dispatcher.register(ContainerManagerEventType.class, containerManager);
@@ -215,7 +214,14 @@ public class NodeManager extends CompositeService implements
     }
     }
   }
   }
 
 
-  
+
+  /**
+   * @return the node health checker
+   */
+  public NodeHealthCheckerService getNodeHealthChecker() {
+    return nodeHealthChecker;
+  }
+
   @Override
   @Override
   public void stateChanged(Service service) {
   public void stateChanged(Service service) {
     // Shutdown the Nodemanager when the NodeStatusUpdater is stopped.
     // Shutdown the Nodemanager when the NodeStatusUpdater is stopped.

+ 7 - 5
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java

@@ -27,7 +27,6 @@ import java.util.Map.Entry;
 import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.AvroRuntimeException;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.NodeHealthCheckerService;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -222,11 +221,14 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
         + numActiveContainers + " containers");
         + numActiveContainers + " containers");
 
 
     NodeHealthStatus nodeHealthStatus = this.context.getNodeHealthStatus();
     NodeHealthStatus nodeHealthStatus = this.context.getNodeHealthStatus();
-    if (this.healthChecker != null) {
-      this.healthChecker.setHealthStatus(nodeHealthStatus);
+    nodeHealthStatus.setHealthReport(healthChecker.getHealthReport());
+    nodeHealthStatus.setIsNodeHealthy(healthChecker.isHealthy());
+    nodeHealthStatus.setLastHealthReportTime(
+        healthChecker.getLastHealthReportTime());
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Node's health-status : " + nodeHealthStatus.getIsNodeHealthy()
+                + ", " + nodeHealthStatus.getHealthReport());
     }
     }
-    LOG.debug("Node's health-status : " + nodeHealthStatus.getIsNodeHealthy()
-        + ", " + nodeHealthStatus.getHealthReport());
     nodeStatus.setNodeHealthStatus(nodeHealthStatus);
     nodeStatus.setNodeHealthStatus(nodeHealthStatus);
 
 
     return nodeStatus;
     return nodeStatus;

+ 12 - 6
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java

@@ -68,6 +68,7 @@ import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerManagerEvent;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerManagerEvent;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
 import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger;
 import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger;
 import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants;
 import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants;
 import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
 import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
@@ -120,7 +121,8 @@ public class ContainerManagerImpl extends CompositeService implements
   private ContainerTokenSecretManager containerTokenSecretManager;
   private ContainerTokenSecretManager containerTokenSecretManager;
 
 
   private final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
   private final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
-  
+
+  protected LocalDirsHandlerService dirsHandler;
   protected final AsyncDispatcher dispatcher;
   protected final AsyncDispatcher dispatcher;
   private final ApplicationACLsManager aclsManager;
   private final ApplicationACLsManager aclsManager;
 
 
@@ -129,9 +131,12 @@ public class ContainerManagerImpl extends CompositeService implements
   public ContainerManagerImpl(Context context, ContainerExecutor exec,
   public ContainerManagerImpl(Context context, ContainerExecutor exec,
       DeletionService deletionContext, NodeStatusUpdater nodeStatusUpdater,
       DeletionService deletionContext, NodeStatusUpdater nodeStatusUpdater,
       NodeManagerMetrics metrics, ContainerTokenSecretManager 
       NodeManagerMetrics metrics, ContainerTokenSecretManager 
-      containerTokenSecretManager, ApplicationACLsManager aclsManager) {
+      containerTokenSecretManager, ApplicationACLsManager aclsManager,
+      LocalDirsHandlerService dirsHandler) {
     super(ContainerManagerImpl.class.getName());
     super(ContainerManagerImpl.class.getName());
     this.context = context;
     this.context = context;
+    this.dirsHandler = dirsHandler;
+
     dispatcher = new AsyncDispatcher();
     dispatcher = new AsyncDispatcher();
     this.deletionService = deletionContext;
     this.deletionService = deletionContext;
     this.metrics = metrics;
     this.metrics = metrics;
@@ -190,9 +195,10 @@ public class ContainerManagerImpl extends CompositeService implements
     if (conf.getBoolean(YarnConfiguration.NM_LOG_AGGREGATION_ENABLED,
     if (conf.getBoolean(YarnConfiguration.NM_LOG_AGGREGATION_ENABLED,
         YarnConfiguration.DEFAULT_NM_LOG_AGGREGATION_ENABLED)) {
         YarnConfiguration.DEFAULT_NM_LOG_AGGREGATION_ENABLED)) {
       return new LogAggregationService(this.dispatcher, context,
       return new LogAggregationService(this.dispatcher, context,
-          deletionService);
+          deletionService, dirsHandler);
     } else {
     } else {
-      return new NonAggregatingLogHandler(this.dispatcher, deletionService);
+      return new NonAggregatingLogHandler(this.dispatcher, deletionService,
+                                          dirsHandler);
     }
     }
   }
   }
 
 
@@ -203,12 +209,12 @@ public class ContainerManagerImpl extends CompositeService implements
   protected ResourceLocalizationService createResourceLocalizationService(
   protected ResourceLocalizationService createResourceLocalizationService(
       ContainerExecutor exec, DeletionService deletionContext) {
       ContainerExecutor exec, DeletionService deletionContext) {
     return new ResourceLocalizationService(this.dispatcher, exec,
     return new ResourceLocalizationService(this.dispatcher, exec,
-        deletionContext);
+        deletionContext, dirsHandler);
   }
   }
 
 
   protected ContainersLauncher createContainersLauncher(Context context,
   protected ContainersLauncher createContainersLauncher(Context context,
       ContainerExecutor exec) {
       ContainerExecutor exec) {
-    return new ContainersLauncher(context, this.dispatcher, exec);
+    return new ContainersLauncher(context, this.dispatcher, exec, dirsHandler);
   }
   }
 
 
   @Override
   @Override

+ 7 - 1
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerExitEvent.java

@@ -22,14 +22,20 @@ import org.apache.hadoop.yarn.api.records.ContainerId;
 
 
 public class ContainerExitEvent extends ContainerEvent {
 public class ContainerExitEvent extends ContainerEvent {
   private int exitCode;
   private int exitCode;
+  private final String diagnosticInfo;
 
 
   public ContainerExitEvent(ContainerId cID, ContainerEventType eventType,
   public ContainerExitEvent(ContainerId cID, ContainerEventType eventType,
-      int exitCode) {
+      int exitCode, String diagnosticInfo) {
     super(cID, eventType);
     super(cID, eventType);
     this.exitCode = exitCode;
     this.exitCode = exitCode;
+    this.diagnosticInfo = diagnosticInfo;
   }
   }
 
 
   public int getExitCode() {
   public int getExitCode() {
     return this.exitCode;
     return this.exitCode;
   }
   }
+
+  public String getDiagnosticInfo() {
+    return diagnosticInfo;
+  }
 }
 }

+ 39 - 30
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java

@@ -50,6 +50,7 @@ import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.DelayedProcessKiller;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.DelayedProcessKiller;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal;
@@ -78,7 +79,6 @@ public class ContainerLaunch implements Callable<Integer> {
   private final Application app;
   private final Application app;
   private final Container container;
   private final Container container;
   private final Configuration conf;
   private final Configuration conf;
-  private final LocalDirAllocator logDirsSelector;
   
   
   private volatile AtomicBoolean shouldLaunchContainer = new AtomicBoolean(false);
   private volatile AtomicBoolean shouldLaunchContainer = new AtomicBoolean(false);
   private volatile AtomicBoolean completed = new AtomicBoolean(false);
   private volatile AtomicBoolean completed = new AtomicBoolean(false);
@@ -88,14 +88,17 @@ public class ContainerLaunch implements Callable<Integer> {
 
 
   private Path pidFilePath = null;
   private Path pidFilePath = null;
 
 
+  private final LocalDirsHandlerService dirsHandler;
+
   public ContainerLaunch(Configuration configuration, Dispatcher dispatcher,
   public ContainerLaunch(Configuration configuration, Dispatcher dispatcher,
-      ContainerExecutor exec, Application app, Container container) {
+      ContainerExecutor exec, Application app, Container container,
+      LocalDirsHandlerService dirsHandler) {
     this.conf = configuration;
     this.conf = configuration;
     this.app = app;
     this.app = app;
     this.exec = exec;
     this.exec = exec;
     this.container = container;
     this.container = container;
     this.dispatcher = dispatcher;
     this.dispatcher = dispatcher;
-    this.logDirsSelector = new LocalDirAllocator(YarnConfiguration.NM_LOG_DIRS);
+    this.dirsHandler = dirsHandler;
     this.sleepDelayBeforeSigKill =
     this.sleepDelayBeforeSigKill =
         conf.getLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS,
         conf.getLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS,
             YarnConfiguration.DEFAULT_NM_SLEEP_DELAY_BEFORE_SIGKILL_MS);
             YarnConfiguration.DEFAULT_NM_SLEEP_DELAY_BEFORE_SIGKILL_MS);
@@ -121,9 +124,8 @@ public class ContainerLaunch implements Callable<Integer> {
       List<String> newCmds = new ArrayList<String>(command.size());
       List<String> newCmds = new ArrayList<String>(command.size());
       String appIdStr = app.getAppId().toString();
       String appIdStr = app.getAppId().toString();
       Path containerLogDir =
       Path containerLogDir =
-          this.logDirsSelector.getLocalPathForWrite(ContainerLaunch
-              .getRelativeContainerLogDir(appIdStr, containerIdStr),
-              LocalDirAllocator.SIZE_UNKNOWN, this.conf, false);
+          dirsHandler.getLogPathForWrite(ContainerLaunch
+              .getRelativeContainerLogDir(appIdStr, containerIdStr), false);
       for (String str : command) {
       for (String str : command) {
         // TODO: Should we instead work via symlinks without this grammar?
         // TODO: Should we instead work via symlinks without this grammar?
         newCmds.add(str.replace(ApplicationConstants.LOG_DIR_EXPANSION_VAR,
         newCmds.add(str.replace(ApplicationConstants.LOG_DIR_EXPANSION_VAR,
@@ -144,47 +146,49 @@ public class ContainerLaunch implements Callable<Integer> {
       // /////////////////////////// End of variable expansion
       // /////////////////////////// End of variable expansion
 
 
       FileContext lfs = FileContext.getLocalFSFileContext();
       FileContext lfs = FileContext.getLocalFSFileContext();
-      LocalDirAllocator lDirAllocator =
-          new LocalDirAllocator(YarnConfiguration.NM_LOCAL_DIRS); // TODO
 
 
       Path nmPrivateContainerScriptPath =
       Path nmPrivateContainerScriptPath =
-          lDirAllocator.getLocalPathForWrite(
+          dirsHandler.getLocalPathForWrite(
               getContainerPrivateDir(appIdStr, containerIdStr) + Path.SEPARATOR
               getContainerPrivateDir(appIdStr, containerIdStr) + Path.SEPARATOR
-                  + CONTAINER_SCRIPT, this.conf);
+                  + CONTAINER_SCRIPT);
       Path nmPrivateTokensPath =
       Path nmPrivateTokensPath =
-          lDirAllocator.getLocalPathForWrite(
+          dirsHandler.getLocalPathForWrite(
               getContainerPrivateDir(appIdStr, containerIdStr)
               getContainerPrivateDir(appIdStr, containerIdStr)
                   + Path.SEPARATOR
                   + Path.SEPARATOR
                   + String.format(ContainerLocalizer.TOKEN_FILE_NAME_FMT,
                   + String.format(ContainerLocalizer.TOKEN_FILE_NAME_FMT,
-                      containerIdStr), this.conf);
+                      containerIdStr));
 
 
       DataOutputStream containerScriptOutStream = null;
       DataOutputStream containerScriptOutStream = null;
       DataOutputStream tokensOutStream = null;
       DataOutputStream tokensOutStream = null;
 
 
       // Select the working directory for the container
       // Select the working directory for the container
       Path containerWorkDir =
       Path containerWorkDir =
-          lDirAllocator.getLocalPathForWrite(ContainerLocalizer.USERCACHE
+          dirsHandler.getLocalPathForWrite(ContainerLocalizer.USERCACHE
               + Path.SEPARATOR + user + Path.SEPARATOR
               + Path.SEPARATOR + user + Path.SEPARATOR
               + ContainerLocalizer.APPCACHE + Path.SEPARATOR + appIdStr
               + ContainerLocalizer.APPCACHE + Path.SEPARATOR + appIdStr
               + Path.SEPARATOR + containerIdStr,
               + Path.SEPARATOR + containerIdStr,
-              LocalDirAllocator.SIZE_UNKNOWN, this.conf, false);
+              LocalDirAllocator.SIZE_UNKNOWN, false);
 
 
       String pidFileSuffix = String.format(ContainerLaunch.PID_FILE_NAME_FMT,
       String pidFileSuffix = String.format(ContainerLaunch.PID_FILE_NAME_FMT,
           containerIdStr);
           containerIdStr);
 
 
       // pid file should be in nm private dir so that it is not 
       // pid file should be in nm private dir so that it is not 
       // accessible by users
       // accessible by users
-      pidFilePath = lDirAllocator.getLocalPathForWrite(
+      pidFilePath = dirsHandler.getLocalPathForWrite(
           ResourceLocalizationService.NM_PRIVATE_DIR + Path.SEPARATOR 
           ResourceLocalizationService.NM_PRIVATE_DIR + Path.SEPARATOR 
-          + pidFileSuffix,
-          this.conf);
+          + pidFileSuffix);
+      List<String> localDirs = dirsHandler.getLocalDirs();
+      List<String> logDirs = dirsHandler.getLogDirs();
+
+      if (!dirsHandler.areDisksHealthy()) {
+        ret = ExitCode.DISKS_FAILED.getExitCode();
+        throw new IOException("Most of the disks failed. "
+            + dirsHandler.getDisksHealthReport());
+      }
 
 
       try {
       try {
         // /////////// Write out the container-script in the nmPrivate space.
         // /////////// Write out the container-script in the nmPrivate space.
-        String[] localDirs =
-            this.conf.getStrings(YarnConfiguration.NM_LOCAL_DIRS,
-                YarnConfiguration.DEFAULT_NM_LOCAL_DIRS);
-        List<Path> appDirs = new ArrayList<Path>(localDirs.length);
+        List<Path> appDirs = new ArrayList<Path>(localDirs.size());
         for (String localDir : localDirs) {
         for (String localDir : localDirs) {
           Path usersdir = new Path(localDir, ContainerLocalizer.USERCACHE);
           Path usersdir = new Path(localDir, ContainerLocalizer.USERCACHE);
           Path userdir = new Path(usersdir, user);
           Path userdir = new Path(usersdir, user);
@@ -234,30 +238,34 @@ public class ContainerLaunch implements Callable<Integer> {
       }
       }
       else {
       else {
         exec.activateContainer(containerID, pidFilePath);
         exec.activateContainer(containerID, pidFilePath);
-        ret =
-            exec.launchContainer(container, nmPrivateContainerScriptPath,
-                nmPrivateTokensPath, user, appIdStr, containerWorkDir);
+        ret = exec.launchContainer(container, nmPrivateContainerScriptPath,
+                nmPrivateTokensPath, user, appIdStr, containerWorkDir,
+                localDirs, logDirs);
       }
       }
     } catch (Throwable e) {
     } catch (Throwable e) {
-      LOG.warn("Failed to launch container", e);
+      LOG.warn("Failed to launch container.", e);
       dispatcher.getEventHandler().handle(new ContainerExitEvent(
       dispatcher.getEventHandler().handle(new ContainerExitEvent(
             launchContext.getContainerId(),
             launchContext.getContainerId(),
-            ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret));
+            ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret,
+            e.getMessage()));
       return ret;
       return ret;
     } finally {
     } finally {
       completed.set(true);
       completed.set(true);
       exec.deactivateContainer(containerID);
       exec.deactivateContainer(containerID);
     }
     }
 
 
-    LOG.debug("Container " + containerIdStr + " completed with exit code "
-        + ret);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Container " + containerIdStr + " completed with exit code "
+                + ret);
+    }
     if (ret == ExitCode.FORCE_KILLED.getExitCode()
     if (ret == ExitCode.FORCE_KILLED.getExitCode()
         || ret == ExitCode.TERMINATED.getExitCode()) {
         || ret == ExitCode.TERMINATED.getExitCode()) {
       // If the process was killed, Send container_cleanedup_after_kill and
       // If the process was killed, Send container_cleanedup_after_kill and
       // just break out of this method.
       // just break out of this method.
       dispatcher.getEventHandler().handle(
       dispatcher.getEventHandler().handle(
             new ContainerExitEvent(launchContext.getContainerId(),
             new ContainerExitEvent(launchContext.getContainerId(),
-                ContainerEventType.CONTAINER_KILLED_ON_REQUEST, ret));
+                ContainerEventType.CONTAINER_KILLED_ON_REQUEST, ret,
+                "Container exited with a non-zero exit code " + ret));
       return ret;
       return ret;
     }
     }
 
 
@@ -265,7 +273,8 @@ public class ContainerLaunch implements Callable<Integer> {
       LOG.warn("Container exited with a non-zero exit code " + ret);
       LOG.warn("Container exited with a non-zero exit code " + ret);
       this.dispatcher.getEventHandler().handle(new ContainerExitEvent(
       this.dispatcher.getEventHandler().handle(new ContainerExitEvent(
               launchContext.getContainerId(),
               launchContext.getContainerId(),
-              ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret));
+              ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret,
+              "Container exited with a non-zero exit code " + ret));
       return ret;
       return ret;
     }
     }
 
 

+ 12 - 5
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java

@@ -33,10 +33,10 @@ import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.UnsupportedFileSystemException;
 import org.apache.hadoop.fs.UnsupportedFileSystemException;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
@@ -59,6 +59,8 @@ public class ContainersLauncher extends AbstractService
   private final Context context;
   private final Context context;
   private final ContainerExecutor exec;
   private final ContainerExecutor exec;
   private final Dispatcher dispatcher;
   private final Dispatcher dispatcher;
+
+  private LocalDirsHandlerService dirsHandler;
   private final ExecutorService containerLauncher =
   private final ExecutorService containerLauncher =
     Executors.newCachedThreadPool(
     Executors.newCachedThreadPool(
         new ThreadFactoryBuilder()
         new ThreadFactoryBuilder()
@@ -80,11 +82,12 @@ public class ContainersLauncher extends AbstractService
 
 
 
 
   public ContainersLauncher(Context context, Dispatcher dispatcher,
   public ContainersLauncher(Context context, Dispatcher dispatcher,
-      ContainerExecutor exec) {
+      ContainerExecutor exec, LocalDirsHandlerService dirsHandler) {
     super("containers-launcher");
     super("containers-launcher");
     this.exec = exec;
     this.exec = exec;
     this.context = context;
     this.context = context;
     this.dispatcher = dispatcher;
     this.dispatcher = dispatcher;
+    this.dirsHandler = dirsHandler;
   }
   }
 
 
   @Override
   @Override
@@ -114,15 +117,19 @@ public class ContainersLauncher extends AbstractService
         Application app =
         Application app =
           context.getApplications().get(
           context.getApplications().get(
               containerId.getApplicationAttemptId().getApplicationId());
               containerId.getApplicationAttemptId().getApplicationId());
-      ContainerLaunch launch =
-          new ContainerLaunch(getConfig(), dispatcher, exec, app,
-              event.getContainer());
+
+        ContainerLaunch launch = new ContainerLaunch(getConfig(), dispatcher,
+            exec, app, event.getContainer(), dirsHandler);
         running.put(containerId,
         running.put(containerId,
             new RunningContainer(containerLauncher.submit(launch), 
             new RunningContainer(containerLauncher.submit(launch), 
                 launch));
                 launch));
         break;
         break;
       case CLEANUP_CONTAINER:
       case CLEANUP_CONTAINER:
         RunningContainer rContainerDatum = running.remove(containerId);
         RunningContainer rContainerDatum = running.remove(containerId);
+        if (rContainerDatum == null) {
+          // Container not launched. So nothing needs to be done.
+          return;
+        }
         Future<Integer> rContainer = rContainerDatum.runningcontainer;
         Future<Integer> rContainer = rContainerDatum.runningcontainer;
         if (rContainer != null 
         if (rContainer != null 
             && !rContainer.isDone()) {
             && !rContainer.isDone()) {

+ 18 - 7
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java

@@ -45,12 +45,10 @@ import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.LocalDirAllocator;
 import org.apache.hadoop.fs.LocalDirAllocator;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.Credentials;
-import org.apache.hadoop.security.SecurityInfo;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.LocalResource;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
@@ -61,7 +59,6 @@ import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalResour
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerHeartbeatResponse;
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerHeartbeatResponse;
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerStatus;
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerStatus;
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.ResourceStatusType;
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.ResourceStatusType;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.security.LocalizerSecurityInfo;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.security.LocalizerTokenIdentifier;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.security.LocalizerTokenIdentifier;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.security.LocalizerTokenSecretManager;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.security.LocalizerTokenSecretManager;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
@@ -186,16 +183,30 @@ public class ContainerLocalizer {
   }
   }
 
 
   Callable<Path> download(LocalDirAllocator lda, LocalResource rsrc,
   Callable<Path> download(LocalDirAllocator lda, LocalResource rsrc,
-      UserGroupInformation ugi) {
-    return new FSDownload(lfs, ugi, conf, lda, rsrc, new Random());
+      UserGroupInformation ugi) throws IOException {
+    Path destPath = lda.getLocalPathForWrite(".", getEstimatedSize(rsrc), conf);
+    return new FSDownload(lfs, ugi, conf, destPath, rsrc, new Random());
+  }
+
+  static long getEstimatedSize(LocalResource rsrc) {
+    if (rsrc.getSize() < 0) {
+      return -1;
+    }
+    switch (rsrc.getType()) {
+      case ARCHIVE:
+        return 5 * rsrc.getSize();
+      case FILE:
+      default:
+        return rsrc.getSize();
+    }
   }
   }
 
 
   void sleep(int duration) throws InterruptedException {
   void sleep(int duration) throws InterruptedException {
     TimeUnit.SECONDS.sleep(duration);
     TimeUnit.SECONDS.sleep(duration);
   }
   }
 
 
-  private void localizeFiles(LocalizationProtocol nodemanager, ExecutorService exec,
-      UserGroupInformation ugi) {
+  private void localizeFiles(LocalizationProtocol nodemanager,
+      ExecutorService exec, UserGroupInformation ugi) throws IOException {
     while (true) {
     while (true) {
       try {
       try {
         LocalizerStatus status = createStatus();
         LocalizerStatus status = createStatus();

+ 73 - 61
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java

@@ -57,7 +57,6 @@ import static org.apache.hadoop.fs.CreateFlag.OVERWRITE;
 import java.io.IOException;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.net.InetSocketAddress;
 import java.util.ArrayList;
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.List;
 import java.util.List;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ConcurrentMap;
@@ -68,7 +67,6 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileContext;
-import org.apache.hadoop.fs.LocalDirAllocator;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.net.NetUtils;
@@ -81,6 +79,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
 import org.apache.hadoop.yarn.server.nodemanager.api.LocalizationProtocol;
 import org.apache.hadoop.yarn.server.nodemanager.api.LocalizationProtocol;
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalResourceStatus;
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalResourceStatus;
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerAction;
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerAction;
@@ -125,19 +124,18 @@ public class ResourceLocalizationService extends CompositeService
   private InetSocketAddress localizationServerAddress;
   private InetSocketAddress localizationServerAddress;
   private long cacheTargetSize;
   private long cacheTargetSize;
   private long cacheCleanupPeriod;
   private long cacheCleanupPeriod;
-  private List<Path> logDirs;
-  private List<Path> localDirs;
-  private List<Path> sysDirs;
+
   private final ContainerExecutor exec;
   private final ContainerExecutor exec;
   protected final Dispatcher dispatcher;
   protected final Dispatcher dispatcher;
   private final DeletionService delService;
   private final DeletionService delService;
   private LocalizerTracker localizerTracker;
   private LocalizerTracker localizerTracker;
   private RecordFactory recordFactory;
   private RecordFactory recordFactory;
-  private final LocalDirAllocator localDirsSelector;
   private final ScheduledExecutorService cacheCleanup;
   private final ScheduledExecutorService cacheCleanup;
 
 
   private final LocalResourcesTracker publicRsrc;
   private final LocalResourcesTracker publicRsrc;
-  
+
+  private LocalDirsHandlerService dirsHandler;
+
   /**
   /**
    * Map of LocalResourceTrackers keyed by username, for private
    * Map of LocalResourceTrackers keyed by username, for private
    * resources.
    * resources.
@@ -153,12 +151,15 @@ public class ResourceLocalizationService extends CompositeService
     new ConcurrentHashMap<String,LocalResourcesTracker>();
     new ConcurrentHashMap<String,LocalResourcesTracker>();
 
 
   public ResourceLocalizationService(Dispatcher dispatcher,
   public ResourceLocalizationService(Dispatcher dispatcher,
-      ContainerExecutor exec, DeletionService delService) {
+      ContainerExecutor exec, DeletionService delService,
+      LocalDirsHandlerService dirsHandler) {
+
     super(ResourceLocalizationService.class.getName());
     super(ResourceLocalizationService.class.getName());
     this.exec = exec;
     this.exec = exec;
     this.dispatcher = dispatcher;
     this.dispatcher = dispatcher;
     this.delService = delService;
     this.delService = delService;
-    this.localDirsSelector = new LocalDirAllocator(YarnConfiguration.NM_LOCAL_DIRS);
+    this.dirsHandler = dirsHandler;
+
     this.publicRsrc = new LocalResourcesTrackerImpl(null, dispatcher);
     this.publicRsrc = new LocalResourcesTrackerImpl(null, dispatcher);
     this.cacheCleanup = new ScheduledThreadPoolExecutor(1,
     this.cacheCleanup = new ScheduledThreadPoolExecutor(1,
         new ThreadFactoryBuilder()
         new ThreadFactoryBuilder()
@@ -177,41 +178,31 @@ public class ResourceLocalizationService extends CompositeService
   @Override
   @Override
   public void init(Configuration conf) {
   public void init(Configuration conf) {
     this.recordFactory = RecordFactoryProvider.getRecordFactory(conf);
     this.recordFactory = RecordFactoryProvider.getRecordFactory(conf);
+
     try {
     try {
       // TODO queue deletions here, rather than NM init?
       // TODO queue deletions here, rather than NM init?
       FileContext lfs = getLocalFileContext(conf);
       FileContext lfs = getLocalFileContext(conf);
-      String[] sLocalDirs =
-        conf.getStrings(YarnConfiguration.NM_LOCAL_DIRS, YarnConfiguration.DEFAULT_NM_LOCAL_DIRS);
-
-      localDirs = new ArrayList<Path>(sLocalDirs.length);
-      logDirs = new ArrayList<Path>(sLocalDirs.length);
-      sysDirs = new ArrayList<Path>(sLocalDirs.length);
-      for (String sLocaldir : sLocalDirs) {
-        Path localdir = new Path(sLocaldir);
-        localDirs.add(localdir);
+      List<String> localDirs = dirsHandler.getLocalDirs();
+      for (String localDir : localDirs) {
         // $local/usercache
         // $local/usercache
-        Path userdir = new Path(localdir, ContainerLocalizer.USERCACHE);
-        lfs.mkdir(userdir, null, true);
+        Path userDir = new Path(localDir, ContainerLocalizer.USERCACHE);
+        lfs.mkdir(userDir, null, true);
         // $local/filecache
         // $local/filecache
-        Path filedir = new Path(localdir, ContainerLocalizer.FILECACHE);
-        lfs.mkdir(filedir, null, true);
+        Path fileDir = new Path(localDir, ContainerLocalizer.FILECACHE);
+        lfs.mkdir(fileDir, null, true);
         // $local/nmPrivate
         // $local/nmPrivate
-        Path sysdir = new Path(localdir, NM_PRIVATE_DIR);
-        lfs.mkdir(sysdir, NM_PRIVATE_PERM, true);
-        sysDirs.add(sysdir);
+        Path sysDir = new Path(localDir, NM_PRIVATE_DIR);
+        lfs.mkdir(sysDir, NM_PRIVATE_PERM, true);
       }
       }
-      String[] sLogdirs = conf.getStrings(YarnConfiguration.NM_LOG_DIRS, YarnConfiguration.DEFAULT_NM_LOG_DIRS);
-      for (String sLogdir : sLogdirs) {
-        Path logdir = new Path(sLogdir);
-        logDirs.add(logdir);
-        lfs.mkdir(logdir, null, true);
+
+      List<String> logDirs = dirsHandler.getLogDirs();
+      for (String logDir : logDirs) {
+        lfs.mkdir(new Path(logDir), null, true);
       }
       }
     } catch (IOException e) {
     } catch (IOException e) {
       throw new YarnException("Failed to initialize LocalizationService", e);
       throw new YarnException("Failed to initialize LocalizationService", e);
     }
     }
-    localDirs = Collections.unmodifiableList(localDirs);
-    logDirs = Collections.unmodifiableList(logDirs);
-    sysDirs = Collections.unmodifiableList(sysDirs);
+
     cacheTargetSize =
     cacheTargetSize =
       conf.getLong(YarnConfiguration.NM_LOCALIZER_CACHE_TARGET_SIZE_MB, YarnConfiguration.DEFAULT_NM_LOCALIZER_CACHE_TARGET_SIZE_MB) << 20;
       conf.getLong(YarnConfiguration.NM_LOCALIZER_CACHE_TARGET_SIZE_MB, YarnConfiguration.DEFAULT_NM_LOCALIZER_CACHE_TARGET_SIZE_MB) << 20;
     cacheCleanupPeriod =
     cacheCleanupPeriod =
@@ -391,7 +382,7 @@ public class ResourceLocalizationService extends CompositeService
     String containerIDStr = c.toString();
     String containerIDStr = c.toString();
     String appIDStr = ConverterUtils.toString(
     String appIDStr = ConverterUtils.toString(
         c.getContainerID().getApplicationAttemptId().getApplicationId());
         c.getContainerID().getApplicationAttemptId().getApplicationId());
-    for (Path localDir : localDirs) {
+    for (String localDir : dirsHandler.getLocalDirs()) {
 
 
       // Delete the user-owned container-dir
       // Delete the user-owned container-dir
       Path usersdir = new Path(localDir, ContainerLocalizer.USERCACHE);
       Path usersdir = new Path(localDir, ContainerLocalizer.USERCACHE);
@@ -428,7 +419,7 @@ public class ResourceLocalizationService extends CompositeService
     // Delete the application directories
     // Delete the application directories
     userName = application.getUser();
     userName = application.getUser();
     appIDStr = application.toString();
     appIDStr = application.toString();
-    for (Path localDir : localDirs) {
+    for (String localDir : dirsHandler.getLocalDirs()) {
 
 
       // Delete the user-owned app-dir
       // Delete the user-owned app-dir
       Path usersdir = new Path(localDir, ContainerLocalizer.USERCACHE);
       Path usersdir = new Path(localDir, ContainerLocalizer.USERCACHE);
@@ -574,12 +565,9 @@ public class ResourceLocalizationService extends CompositeService
 
 
   class PublicLocalizer extends Thread {
   class PublicLocalizer extends Thread {
 
 
-    static final String PUBCACHE_CTXT = "public.cache.dirs";
-
     final FileContext lfs;
     final FileContext lfs;
     final Configuration conf;
     final Configuration conf;
     final ExecutorService threadPool;
     final ExecutorService threadPool;
-    final LocalDirAllocator publicDirs;
     final CompletionService<Path> queue;
     final CompletionService<Path> queue;
     final Map<Future<Path>,LocalizerResourceRequestEvent> pending;
     final Map<Future<Path>,LocalizerResourceRequestEvent> pending;
     // TODO hack to work around broken signaling
     // TODO hack to work around broken signaling
@@ -601,13 +589,23 @@ public class ResourceLocalizationService extends CompositeService
       this.conf = conf;
       this.conf = conf;
       this.pending = pending;
       this.pending = pending;
       this.attempts = attempts;
       this.attempts = attempts;
-      String[] publicFilecache = new String[localDirs.size()];
-      for (int i = 0, n = localDirs.size(); i < n; ++i) {
-        publicFilecache[i] =
-          new Path(localDirs.get(i), ContainerLocalizer.FILECACHE).toString();
-      }
-      conf.setStrings(PUBCACHE_CTXT, publicFilecache);
-      this.publicDirs = new LocalDirAllocator(PUBCACHE_CTXT);
+//      List<String> localDirs = dirsHandler.getLocalDirs();
+//      String[] publicFilecache = new String[localDirs.size()];
+//      for (int i = 0, n = localDirs.size(); i < n; ++i) {
+//        publicFilecache[i] =
+//          new Path(localDirs.get(i), ContainerLocalizer.FILECACHE).toString();
+//      }
+//      conf.setStrings(PUBCACHE_CTXT, publicFilecache);
+
+//      this.publicDirDestPath = new LocalDirAllocator(PUBCACHE_CTXT).getLocalPathForWrite(pathStr, conf);
+//      List<String> localDirs = dirsHandler.getLocalDirs();
+//      String[] publicFilecache = new String[localDirs.size()];
+//      int i = 0;
+//      for (String localDir : localDirs) {
+//        publicFilecache[i++] =
+//            new Path(localDir, ContainerLocalizer.FILECACHE).toString();
+//      }
+
       this.threadPool = threadPool;
       this.threadPool = threadPool;
       this.queue = new ExecutorCompletionService<Path>(threadPool);
       this.queue = new ExecutorCompletionService<Path>(threadPool);
     }
     }
@@ -619,11 +617,19 @@ public class ResourceLocalizationService extends CompositeService
       synchronized (attempts) {
       synchronized (attempts) {
         List<LocalizerResourceRequestEvent> sigh = attempts.get(key);
         List<LocalizerResourceRequestEvent> sigh = attempts.get(key);
         if (null == sigh) {
         if (null == sigh) {
-          pending.put(queue.submit(new FSDownload(
-                  lfs, null, conf, publicDirs,
-                  request.getResource().getRequest(), new Random())),
-              request);
-          attempts.put(key, new LinkedList<LocalizerResourceRequestEvent>());
+          LocalResource resource = request.getResource().getRequest();
+          try {
+            Path publicDirDestPath = dirsHandler.getLocalPathForWrite(
+                "." + Path.SEPARATOR + ContainerLocalizer.FILECACHE,
+                ContainerLocalizer.getEstimatedSize(resource), true);
+            pending.put(queue.submit(new FSDownload(
+                lfs, null, conf, publicDirDestPath, resource, new Random())),
+                request);
+            attempts.put(key, new LinkedList<LocalizerResourceRequestEvent>());
+          } catch (IOException e) {
+            LOG.error("Local path for public localization is not found. "
+                + " May be disks failed.", e);
+          }
         } else {
         } else {
           sigh.add(request);
           sigh.add(request);
         }
         }
@@ -844,24 +850,30 @@ public class ResourceLocalizationService extends CompositeService
     public void run() {
     public void run() {
       Path nmPrivateCTokensPath = null;
       Path nmPrivateCTokensPath = null;
       try {
       try {
-        // Use LocalDirAllocator to get nmPrivateDir
+        // Get nmPrivateDir
         nmPrivateCTokensPath =
         nmPrivateCTokensPath =
-            localDirsSelector.getLocalPathForWrite(
-                NM_PRIVATE_DIR
-                    + Path.SEPARATOR
+          dirsHandler.getLocalPathForWrite(
+                NM_PRIVATE_DIR + Path.SEPARATOR
                     + String.format(ContainerLocalizer.TOKEN_FILE_NAME_FMT,
                     + String.format(ContainerLocalizer.TOKEN_FILE_NAME_FMT,
-                        localizerId), getConfig());
+                        localizerId));
 
 
         // 0) init queue, etc.
         // 0) init queue, etc.
         // 1) write credentials to private dir
         // 1) write credentials to private dir
         writeCredentials(nmPrivateCTokensPath);
         writeCredentials(nmPrivateCTokensPath);
         // 2) exec initApplication and wait
         // 2) exec initApplication and wait
-        exec.startLocalizer(nmPrivateCTokensPath, localizationServerAddress,
-            context.getUser(),
-            ConverterUtils.toString(
-                context.getContainerId().
-                    getApplicationAttemptId().getApplicationId()),
-            localizerId, localDirs);
+        List<String> localDirs = dirsHandler.getLocalDirs();
+        List<String> logDirs = dirsHandler.getLogDirs();
+        if (dirsHandler.areDisksHealthy()) {
+          exec.startLocalizer(nmPrivateCTokensPath, localizationServerAddress,
+              context.getUser(),
+              ConverterUtils.toString(
+                  context.getContainerId().
+                  getApplicationAttemptId().getApplicationId()),
+              localizerId, localDirs, logDirs);
+        } else {
+          throw new IOException("All disks failed. "
+              + dirsHandler.getDisksHealthReport());
+        }
       // TODO handle ExitCodeException separately?
       // TODO handle ExitCodeException separately?
       } catch (Exception e) {
       } catch (Exception e) {
         LOG.info("Localizer failed", e);
         LOG.info("Localizer failed", e);

+ 14 - 7
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregatio
 
 
 import java.io.IOException;
 import java.io.IOException;
 import java.security.PrivilegedExceptionAction;
 import java.security.PrivilegedExceptionAction;
+import java.util.List;
 import java.util.Map;
 import java.util.Map;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.LinkedBlockingQueue;
@@ -31,6 +32,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -40,10 +42,12 @@ import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogKey;
 import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogValue;
 import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogValue;
 import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogWriter;
 import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogWriter;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 
 
+
 public class AppLogAggregatorImpl implements AppLogAggregator {
 public class AppLogAggregatorImpl implements AppLogAggregator {
 
 
   private static final Log LOG = LogFactory
   private static final Log LOG = LogFactory
@@ -51,6 +55,7 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
   private static final int THREAD_SLEEP_TIME = 1000;
   private static final int THREAD_SLEEP_TIME = 1000;
   private static final String TMP_FILE_SUFFIX = ".tmp";
   private static final String TMP_FILE_SUFFIX = ".tmp";
 
 
+  private final LocalDirsHandlerService dirsHandler;
   private final Dispatcher dispatcher;
   private final Dispatcher dispatcher;
   private final ApplicationId appId;
   private final ApplicationId appId;
   private final String applicationId;
   private final String applicationId;
@@ -58,7 +63,6 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
   private final Configuration conf;
   private final Configuration conf;
   private final DeletionService delService;
   private final DeletionService delService;
   private final UserGroupInformation userUgi;
   private final UserGroupInformation userUgi;
-  private final String[] rootLogDirs;
   private final Path remoteNodeLogFileForApp;
   private final Path remoteNodeLogFileForApp;
   private final Path remoteNodeTmpLogFileForApp;
   private final Path remoteNodeTmpLogFileForApp;
   private final ContainerLogsRetentionPolicy retentionPolicy;
   private final ContainerLogsRetentionPolicy retentionPolicy;
@@ -72,7 +76,7 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
 
 
   public AppLogAggregatorImpl(Dispatcher dispatcher,
   public AppLogAggregatorImpl(Dispatcher dispatcher,
       DeletionService deletionService, Configuration conf, ApplicationId appId,
       DeletionService deletionService, Configuration conf, ApplicationId appId,
-      UserGroupInformation userUgi, String[] localRootLogDirs,
+      UserGroupInformation userUgi, LocalDirsHandlerService dirsHandler,
       Path remoteNodeLogFileForApp,
       Path remoteNodeLogFileForApp,
       ContainerLogsRetentionPolicy retentionPolicy,
       ContainerLogsRetentionPolicy retentionPolicy,
       Map<ApplicationAccessType, String> appAcls) {
       Map<ApplicationAccessType, String> appAcls) {
@@ -82,7 +86,7 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
     this.appId = appId;
     this.appId = appId;
     this.applicationId = ConverterUtils.toString(appId);
     this.applicationId = ConverterUtils.toString(appId);
     this.userUgi = userUgi;
     this.userUgi = userUgi;
-    this.rootLogDirs = localRootLogDirs;
+    this.dirsHandler = dirsHandler;
     this.remoteNodeLogFileForApp = remoteNodeLogFileForApp;
     this.remoteNodeLogFileForApp = remoteNodeLogFileForApp;
     this.remoteNodeTmpLogFileForApp = getRemoteNodeTmpLogFileForApp();
     this.remoteNodeTmpLogFileForApp = getRemoteNodeTmpLogFileForApp();
     this.retentionPolicy = retentionPolicy;
     this.retentionPolicy = retentionPolicy;
@@ -115,9 +119,11 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
       }
       }
     }
     }
 
 
-    LOG.info("Uploading logs for container " + containerId);
+    LOG.info("Uploading logs for container " + containerId
+        + ". Current good log dirs are "
+        + StringUtils.join(",", dirsHandler.getLogDirs()));
     LogKey logKey = new LogKey(containerId);
     LogKey logKey = new LogKey(containerId);
-    LogValue logValue = new LogValue(this.rootLogDirs, containerId);
+    LogValue logValue = new LogValue(dirsHandler.getLogDirs(), containerId);
     try {
     try {
       this.writer.append(logKey, logValue);
       this.writer.append(logKey, logValue);
     } catch (IOException e) {
     } catch (IOException e) {
@@ -150,9 +156,10 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
     }
     }
 
 
     // Remove the local app-log-dirs
     // Remove the local app-log-dirs
-    Path[] localAppLogDirs = new Path[this.rootLogDirs.length];
+    List<String> rootLogDirs = dirsHandler.getLogDirs();
+    Path[] localAppLogDirs = new Path[rootLogDirs.size()];
     int index = 0;
     int index = 0;
-    for (String rootLogDir : this.rootLogDirs) {
+    for (String rootLogDir : rootLogDirs) {
       localAppLogDirs[index] = new Path(rootLogDir, this.applicationId);
       localAppLogDirs[index] = new Path(rootLogDir, this.applicationId);
       index++;
       index++;
     }
     }

+ 8 - 8
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java

@@ -47,6 +47,7 @@ import org.apache.hadoop.yarn.logaggregation.ContainerLogsRetentionPolicy;
 import org.apache.hadoop.yarn.logaggregation.LogAggregationUtils;
 import org.apache.hadoop.yarn.logaggregation.LogAggregationUtils;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.LogHandler;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.LogHandler;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppFinishedEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppFinishedEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppStartedEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppStartedEvent;
@@ -85,7 +86,7 @@ public class LogAggregationService extends AbstractService implements
   private final DeletionService deletionService;
   private final DeletionService deletionService;
   private final Dispatcher dispatcher;
   private final Dispatcher dispatcher;
 
 
-  private String[] localRootLogDirs;
+  private LocalDirsHandlerService dirsHandler;
   Path remoteRootLogDir;
   Path remoteRootLogDir;
   String remoteRootLogDirSuffix;
   String remoteRootLogDirSuffix;
   private NodeId nodeId;
   private NodeId nodeId;
@@ -95,11 +96,12 @@ public class LogAggregationService extends AbstractService implements
   private final ExecutorService threadPool;
   private final ExecutorService threadPool;
 
 
   public LogAggregationService(Dispatcher dispatcher, Context context,
   public LogAggregationService(Dispatcher dispatcher, Context context,
-      DeletionService deletionService) {
+      DeletionService deletionService, LocalDirsHandlerService dirsHandler) {
     super(LogAggregationService.class.getName());
     super(LogAggregationService.class.getName());
     this.dispatcher = dispatcher;
     this.dispatcher = dispatcher;
     this.context = context;
     this.context = context;
     this.deletionService = deletionService;
     this.deletionService = deletionService;
+    this.dirsHandler = dirsHandler;
     this.appLogAggregators =
     this.appLogAggregators =
         new ConcurrentHashMap<ApplicationId, AppLogAggregator>();
         new ConcurrentHashMap<ApplicationId, AppLogAggregator>();
     this.threadPool = Executors.newCachedThreadPool(
     this.threadPool = Executors.newCachedThreadPool(
@@ -109,9 +111,6 @@ public class LogAggregationService extends AbstractService implements
   }
   }
 
 
   public synchronized void init(Configuration conf) {
   public synchronized void init(Configuration conf) {
-    this.localRootLogDirs =
-        conf.getStrings(YarnConfiguration.NM_LOG_DIRS,
-            YarnConfiguration.DEFAULT_NM_LOG_DIRS);
     this.remoteRootLogDir =
     this.remoteRootLogDir =
         new Path(conf.get(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
         new Path(conf.get(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
             YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR));
             YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR));
@@ -291,9 +290,10 @@ public class LogAggregationService extends AbstractService implements
 
 
     // New application
     // New application
     AppLogAggregator appLogAggregator =
     AppLogAggregator appLogAggregator =
-        new AppLogAggregatorImpl(this.dispatcher, this.deletionService, getConfig(), appId,
-            userUgi, this.localRootLogDirs, 
-            getRemoteNodeLogFileForApp(appId, user), logRetentionPolicy, appAcls);
+        new AppLogAggregatorImpl(this.dispatcher, this.deletionService,
+            getConfig(), appId, userUgi, dirsHandler,
+            getRemoteNodeLogFileForApp(appId, user), logRetentionPolicy,
+            appAcls);
     if (this.appLogAggregators.putIfAbsent(appId, appLogAggregator) != null) {
     if (this.appLogAggregators.putIfAbsent(appId, appLogAggregator) != null) {
       throw new YarnException("Duplicate initApp for " + appId);
       throw new YarnException("Duplicate initApp for " + appId);
     }
     }

+ 9 - 8
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/NonAggregatingLogHandler.java

@@ -17,6 +17,7 @@
  */
  */
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler;
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler;
 
 
+import java.util.List;
 import java.util.Map;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ScheduledThreadPoolExecutor;
 import java.util.concurrent.ScheduledThreadPoolExecutor;
@@ -31,6 +32,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppFinishedEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppFinishedEvent;
@@ -53,15 +55,16 @@ public class NonAggregatingLogHandler extends AbstractService implements
   private final DeletionService delService;
   private final DeletionService delService;
   private final Map<ApplicationId, String> appOwners;
   private final Map<ApplicationId, String> appOwners;
 
 
-  private String[] rootLogDirs;
+  private final LocalDirsHandlerService dirsHandler;
   private long deleteDelaySeconds;
   private long deleteDelaySeconds;
   private ScheduledThreadPoolExecutor sched;
   private ScheduledThreadPoolExecutor sched;
 
 
   public NonAggregatingLogHandler(Dispatcher dispatcher,
   public NonAggregatingLogHandler(Dispatcher dispatcher,
-      DeletionService delService) {
+      DeletionService delService, LocalDirsHandlerService dirsHandler) {
     super(NonAggregatingLogHandler.class.getName());
     super(NonAggregatingLogHandler.class.getName());
     this.dispatcher = dispatcher;
     this.dispatcher = dispatcher;
     this.delService = delService;
     this.delService = delService;
+    this.dirsHandler = dirsHandler;
     this.appOwners = new ConcurrentHashMap<ApplicationId, String>();
     this.appOwners = new ConcurrentHashMap<ApplicationId, String>();
   }
   }
 
 
@@ -70,9 +73,6 @@ public class NonAggregatingLogHandler extends AbstractService implements
     // Default 3 hours.
     // Default 3 hours.
     this.deleteDelaySeconds =
     this.deleteDelaySeconds =
         conf.getLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 3 * 60 * 60);
         conf.getLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 3 * 60 * 60);
-    this.rootLogDirs =
-        conf.getStrings(YarnConfiguration.NM_LOG_DIRS,
-            YarnConfiguration.DEFAULT_NM_LOG_DIRS);
     sched = createScheduledThreadPoolExecutor(conf);
     sched = createScheduledThreadPoolExecutor(conf);
     super.init(conf);
     super.init(conf);
   }
   }
@@ -145,10 +145,11 @@ public class NonAggregatingLogHandler extends AbstractService implements
     @Override
     @Override
     @SuppressWarnings("unchecked")
     @SuppressWarnings("unchecked")
     public void run() {
     public void run() {
-      Path[] localAppLogDirs =
-          new Path[NonAggregatingLogHandler.this.rootLogDirs.length];
+      List<String> rootLogDirs =
+          NonAggregatingLogHandler.this.dirsHandler.getLogDirs();
+      Path[] localAppLogDirs = new Path[rootLogDirs.size()];
       int index = 0;
       int index = 0;
-      for (String rootLogDir : NonAggregatingLogHandler.this.rootLogDirs) {
+      for (String rootLogDir : rootLogDirs) {
         localAppLogDirs[index] = new Path(rootLogDir, applicationId.toString());
         localAppLogDirs[index] = new Path(rootLogDir, applicationId.toString());
         index++;
         index++;
       }
       }

+ 15 - 17
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsPage.java

@@ -34,15 +34,14 @@ import java.util.EnumSet;
 import java.util.List;
 import java.util.List;
 
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.LocalDirAllocator;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
@@ -87,17 +86,18 @@ public class ContainerLogsPage extends NMView {
   public static class ContainersLogsBlock extends HtmlBlock implements
   public static class ContainersLogsBlock extends HtmlBlock implements
       YarnWebParams {    
       YarnWebParams {    
     private final Configuration conf;
     private final Configuration conf;
-    private final LocalDirAllocator logsSelector;
     private final Context nmContext;
     private final Context nmContext;
     private final ApplicationACLsManager aclsManager;
     private final ApplicationACLsManager aclsManager;
+    private final LocalDirsHandlerService dirsHandler;
 
 
     @Inject
     @Inject
     public ContainersLogsBlock(Configuration conf, Context context,
     public ContainersLogsBlock(Configuration conf, Context context,
-        ApplicationACLsManager aclsManager) {
+        ApplicationACLsManager aclsManager,
+        LocalDirsHandlerService dirsHandler) {
       this.conf = conf;
       this.conf = conf;
-      this.logsSelector = new LocalDirAllocator(YarnConfiguration.NM_LOG_DIRS);
       this.nmContext = context;
       this.nmContext = context;
       this.aclsManager = aclsManager;
       this.aclsManager = aclsManager;
+      this.dirsHandler = dirsHandler;
     }
     }
 
 
     @Override
     @Override
@@ -198,11 +198,10 @@ public class ContainerLogsPage extends NMView {
         File logFile = null;
         File logFile = null;
         try {
         try {
           logFile =
           logFile =
-              new File(this.logsSelector
-                  .getLocalPathToRead(
-                      ContainerLaunch.getRelativeContainerLogDir(
-                          applicationId.toString(), containerId.toString())
-                          + Path.SEPARATOR + $(CONTAINER_LOG_TYPE), this.conf)
+              new File(this.dirsHandler.getLogPathToRead(
+                  ContainerLaunch.getRelativeContainerLogDir(
+                  applicationId.toString(), containerId.toString())
+                  + Path.SEPARATOR + $(CONTAINER_LOG_TYPE))
                   .toUri().getPath());
                   .toUri().getPath());
         } catch (Exception e) {
         } catch (Exception e) {
           html.h1("Cannot find this log on the local disk.");
           html.h1("Cannot find this log on the local disk.");
@@ -272,8 +271,8 @@ public class ContainerLogsPage extends NMView {
         }
         }
       } else {
       } else {
         // Just print out the log-types
         // Just print out the log-types
-        List<File> containerLogsDirs =
-            getContainerLogDirs(this.conf, containerId);
+        List<File> containerLogsDirs = getContainerLogDirs(containerId,
+            dirsHandler);
         boolean foundLogFile = false;
         boolean foundLogFile = false;
         for (File containerLogsDir : containerLogsDirs) {
         for (File containerLogsDir : containerLogsDirs) {
           for (File logFile : containerLogsDir.listFiles()) {
           for (File logFile : containerLogsDir.listFiles()) {
@@ -293,11 +292,10 @@ public class ContainerLogsPage extends NMView {
       return;
       return;
     }
     }
 
 
-    static List<File>
-        getContainerLogDirs(Configuration conf, ContainerId containerId) {
-      String[] logDirs = conf.getStrings(YarnConfiguration.NM_LOG_DIRS,
-          YarnConfiguration.DEFAULT_NM_LOG_DIRS);
-      List<File> containerLogDirs = new ArrayList<File>(logDirs.length);
+    static List<File> getContainerLogDirs(ContainerId containerId,
+            LocalDirsHandlerService dirsHandler) {
+      List<String> logDirs = dirsHandler.getLogDirs();
+      List<File> containerLogDirs = new ArrayList<File>(logDirs.size());
       for (String logDir : logDirs) {
       for (String logDir : logDirs) {
         String appIdStr = 
         String appIdStr = 
             ConverterUtils.toString(
             ConverterUtils.toString(

+ 9 - 3
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/WebServer.java

@@ -26,6 +26,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
 import org.apache.hadoop.yarn.server.nodemanager.ResourceView;
 import org.apache.hadoop.yarn.server.nodemanager.ResourceView;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.service.AbstractService;
 import org.apache.hadoop.yarn.service.AbstractService;
@@ -42,10 +43,11 @@ public class WebServer extends AbstractService {
   private WebApp webApp;
   private WebApp webApp;
 
 
   public WebServer(Context nmContext, ResourceView resourceView,
   public WebServer(Context nmContext, ResourceView resourceView,
-      ApplicationACLsManager aclsManager) {
+      ApplicationACLsManager aclsManager,
+      LocalDirsHandlerService dirsHandler) {
     super(WebServer.class.getName());
     super(WebServer.class.getName());
     this.nmContext = nmContext;
     this.nmContext = nmContext;
-    this.nmWebApp = new NMWebApp(resourceView, aclsManager);
+    this.nmWebApp = new NMWebApp(resourceView, aclsManager, dirsHandler);
   }
   }
 
 
   @Override
   @Override
@@ -81,17 +83,21 @@ public class WebServer extends AbstractService {
 
 
     private final ResourceView resourceView;
     private final ResourceView resourceView;
     private final ApplicationACLsManager aclsManager;
     private final ApplicationACLsManager aclsManager;
+    private final LocalDirsHandlerService dirsHandler;
 
 
     public NMWebApp(ResourceView resourceView,
     public NMWebApp(ResourceView resourceView,
-        ApplicationACLsManager aclsManager) {
+        ApplicationACLsManager aclsManager,
+        LocalDirsHandlerService dirsHandler) {
       this.resourceView = resourceView;
       this.resourceView = resourceView;
       this.aclsManager = aclsManager;
       this.aclsManager = aclsManager;
+      this.dirsHandler = dirsHandler;
     }
     }
 
 
     @Override
     @Override
     public void setup() {
     public void setup() {
       bind(ResourceView.class).toInstance(this.resourceView);
       bind(ResourceView.class).toInstance(this.resourceView);
       bind(ApplicationACLsManager.class).toInstance(this.aclsManager);
       bind(ApplicationACLsManager.class).toInstance(this.aclsManager);
+      bind(LocalDirsHandlerService.class).toInstance(dirsHandler);
       route("/", NMController.class, "info");
       route("/", NMController.class, "info");
       route("/node", NMController.class, "node");
       route("/node", NMController.class, "node");
       route("/allApplications", NMController.class, "allApplications");
       route("/allApplications", NMController.class, "allApplications");

+ 9 - 3
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.c

@@ -261,8 +261,15 @@ char * get_value(const char* key) {
  * Value delimiter is assumed to be a comma.
  * Value delimiter is assumed to be a comma.
  */
  */
 char ** get_values(const char * key) {
 char ** get_values(const char * key) {
-  char ** toPass = NULL;
   char *value = get_value(key);
   char *value = get_value(key);
+  return extract_values(value);
+}
+
+/**
+ * Extracts array of values from the comma separated list of values.
+ */
+char ** extract_values(char *value) {
+  char ** toPass = NULL;
   char *tempTok = NULL;
   char *tempTok = NULL;
   char *tempstr = NULL;
   char *tempstr = NULL;
   int size = 0;
   int size = 0;
@@ -276,8 +283,7 @@ char ** get_values(const char * key) {
       toPass[size++] = tempTok;
       toPass[size++] = tempTok;
       if(size == toPassSize) {
       if(size == toPassSize) {
         toPassSize += MAX_SIZE;
         toPassSize += MAX_SIZE;
-        toPass = (char **) realloc(toPass,(sizeof(char *) *
-                                           (MAX_SIZE * toPassSize)));
+        toPass = (char **) realloc(toPass,(sizeof(char *) * toPassSize));
       }
       }
       tempTok = strtok_r(NULL, ",", &tempstr);
       tempTok = strtok_r(NULL, ",", &tempstr);
     }
     }

+ 3 - 0
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.h

@@ -34,6 +34,9 @@ char *get_value(const char* key);
 //comma seperated strings.
 //comma seperated strings.
 char ** get_values(const char* key);
 char ** get_values(const char* key);
 
 
+// Extracts array of values from the comma separated list of values.
+char ** extract_values(char *value);
+
 // free the memory returned by get_values
 // free the memory returned by get_values
 void free_values(char** values);
 void free_values(char** values);
 
 

+ 22 - 53
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c

@@ -357,7 +357,7 @@ int mkdirs(const char* path, mode_t perm) {
  * It creates the container work and log directories.
  * It creates the container work and log directories.
  */
  */
 static int create_container_directories(const char* user, const char *app_id, 
 static int create_container_directories(const char* user, const char *app_id, 
-					const char *container_id) {
+    const char *container_id, char* const* local_dir, char* const* log_dir) {
   // create dirs as 0750
   // create dirs as 0750
   const mode_t perms = S_IRWXU | S_IRGRP | S_IXGRP;
   const mode_t perms = S_IRWXU | S_IRGRP | S_IXGRP;
   if (app_id == NULL || container_id == NULL || user == NULL) {
   if (app_id == NULL || container_id == NULL || user == NULL) {
@@ -367,20 +367,11 @@ static int create_container_directories(const char* user, const char *app_id,
   }
   }
 
 
   int result = -1;
   int result = -1;
-
-  char **local_dir = get_values(NM_SYS_DIR_KEY);
-
-  if (local_dir == NULL) {
-    fprintf(LOGFILE, "%s is not configured.\n", NM_SYS_DIR_KEY);
-    return -1;
-  }
-
-  char **local_dir_ptr;
+  char* const* local_dir_ptr;
   for(local_dir_ptr = local_dir; *local_dir_ptr != NULL; ++local_dir_ptr) {
   for(local_dir_ptr = local_dir; *local_dir_ptr != NULL; ++local_dir_ptr) {
     char *container_dir = get_container_work_directory(*local_dir_ptr, user, app_id, 
     char *container_dir = get_container_work_directory(*local_dir_ptr, user, app_id, 
                                                 container_id);
                                                 container_id);
     if (container_dir == NULL) {
     if (container_dir == NULL) {
-      free_values(local_dir);
       return -1;
       return -1;
     }
     }
     if (mkdirs(container_dir, perms) == 0) {
     if (mkdirs(container_dir, perms) == 0) {
@@ -390,7 +381,6 @@ static int create_container_directories(const char* user, const char *app_id,
     free(container_dir);
     free(container_dir);
 
 
   }
   }
-  free_values(local_dir);
   if (result != 0) {
   if (result != 0) {
     return result;
     return result;
   }
   }
@@ -404,19 +394,11 @@ static int create_container_directories(const char* user, const char *app_id,
   } else {
   } else {
     sprintf(combined_name, "%s/%s", app_id, container_id);
     sprintf(combined_name, "%s/%s", app_id, container_id);
 
 
-    char **log_dir = get_values(NM_LOG_DIR_KEY);
-    if (log_dir == NULL) {
-      free(combined_name);
-      fprintf(LOGFILE, "%s is not configured.\n", NM_LOG_DIR_KEY);
-      return -1;
-    }
-
-    char **log_dir_ptr;
+    char* const* log_dir_ptr;
     for(log_dir_ptr = log_dir; *log_dir_ptr != NULL; ++log_dir_ptr) {
     for(log_dir_ptr = log_dir; *log_dir_ptr != NULL; ++log_dir_ptr) {
       char *container_log_dir = get_app_log_directory(*log_dir_ptr, combined_name);
       char *container_log_dir = get_app_log_directory(*log_dir_ptr, combined_name);
       if (container_log_dir == NULL) {
       if (container_log_dir == NULL) {
         free(combined_name);
         free(combined_name);
-        free_values(log_dir);
         return -1;
         return -1;
       } else if (mkdirs(container_log_dir, perms) != 0) {
       } else if (mkdirs(container_log_dir, perms) != 0) {
     	free(container_log_dir);
     	free(container_log_dir);
@@ -426,7 +408,6 @@ static int create_container_directories(const char* user, const char *app_id,
       }
       }
     }
     }
     free(combined_name);
     free(combined_name);
-    free_values(log_dir);
   }
   }
   return result;
   return result;
 }
 }
@@ -660,17 +641,12 @@ static int copy_file(int input, const char* in_filename,
 /**
 /**
  * Function to initialize the user directories of a user.
  * Function to initialize the user directories of a user.
  */
  */
-int initialize_user(const char *user) {
-  char **local_dir = get_values(NM_SYS_DIR_KEY);
-  if (local_dir == NULL) {
-    fprintf(LOGFILE, "%s is not configured.\n", NM_SYS_DIR_KEY);
-    return INVALID_NM_ROOT_DIRS;
-  }
+int initialize_user(const char *user, char* const* local_dirs) {
 
 
   char *user_dir;
   char *user_dir;
-  char **local_dir_ptr = local_dir;
+  char* const* local_dir_ptr;
   int failed = 0;
   int failed = 0;
-  for(local_dir_ptr = local_dir; *local_dir_ptr != 0; ++local_dir_ptr) {
+  for(local_dir_ptr = local_dirs; *local_dir_ptr != 0; ++local_dir_ptr) {
     user_dir = get_user_directory(*local_dir_ptr, user);
     user_dir = get_user_directory(*local_dir_ptr, user);
     if (user_dir == NULL) {
     if (user_dir == NULL) {
       fprintf(LOGFILE, "Couldn't get userdir directory for %s.\n", user);
       fprintf(LOGFILE, "Couldn't get userdir directory for %s.\n", user);
@@ -682,32 +658,29 @@ int initialize_user(const char *user) {
     }
     }
     free(user_dir);
     free(user_dir);
   }
   }
-  free_values(local_dir);
   return failed ? INITIALIZE_USER_FAILED : 0;
   return failed ? INITIALIZE_USER_FAILED : 0;
 }
 }
 
 
 /**
 /**
  * Function to prepare the application directories for the container.
  * Function to prepare the application directories for the container.
  */
  */
-int initialize_app(const char *user, const char *app_id, 
-		   const char* nmPrivate_credentials_file, char* const* args) {
+int initialize_app(const char *user, const char *app_id,
+                   const char* nmPrivate_credentials_file,
+                   char* const* local_dirs, char* const* log_roots,
+                   char* const* args) {
   if (app_id == NULL || user == NULL) {
   if (app_id == NULL || user == NULL) {
     fprintf(LOGFILE, "Either app_id is null or the user passed is null.\n");
     fprintf(LOGFILE, "Either app_id is null or the user passed is null.\n");
     return INVALID_ARGUMENT_NUMBER;
     return INVALID_ARGUMENT_NUMBER;
   }
   }
 
 
   // create the user directory on all disks
   // create the user directory on all disks
-  int result = initialize_user(user);
+  int result = initialize_user(user, local_dirs);
   if (result != 0) {
   if (result != 0) {
     return result;
     return result;
   }
   }
 
 
   ////////////// create the log directories for the app on all disks
   ////////////// create the log directories for the app on all disks
-  char **log_roots = get_values(NM_LOG_DIR_KEY);
-  if (log_roots == NULL) {
-    return INVALID_CONFIG_FILE;
-  }
-  char **log_root;
+  char* const* log_root;
   char *any_one_app_log_dir = NULL;
   char *any_one_app_log_dir = NULL;
   for(log_root=log_roots; *log_root != NULL; ++log_root) {
   for(log_root=log_roots; *log_root != NULL; ++log_root) {
     char *app_log_dir = get_app_log_directory(*log_root, app_id);
     char *app_log_dir = get_app_log_directory(*log_root, app_id);
@@ -722,7 +695,7 @@ int initialize_app(const char *user, const char *app_id,
       free(app_log_dir);
       free(app_log_dir);
     }
     }
   }
   }
-  free_values(log_roots);
+
   if (any_one_app_log_dir == NULL) {
   if (any_one_app_log_dir == NULL) {
     fprintf(LOGFILE, "Did not create any app-log directories\n");
     fprintf(LOGFILE, "Did not create any app-log directories\n");
     return -1;
     return -1;
@@ -743,15 +716,9 @@ int initialize_app(const char *user, const char *app_id,
 
 
   // 750
   // 750
   mode_t permissions = S_IRWXU | S_IRGRP | S_IXGRP;
   mode_t permissions = S_IRWXU | S_IRGRP | S_IXGRP;
-  char **nm_roots = get_values(NM_SYS_DIR_KEY);
-
-  if (nm_roots == NULL) {
-    return INVALID_CONFIG_FILE;
-  }
-
-  char **nm_root;
+  char* const* nm_root;
   char *primary_app_dir = NULL;
   char *primary_app_dir = NULL;
-  for(nm_root=nm_roots; *nm_root != NULL; ++nm_root) {
+  for(nm_root=local_dirs; *nm_root != NULL; ++nm_root) {
     char *app_dir = get_app_directory(*nm_root, user, app_id);
     char *app_dir = get_app_directory(*nm_root, user, app_id);
     if (app_dir == NULL) {
     if (app_dir == NULL) {
       // try the next one
       // try the next one
@@ -763,7 +730,7 @@ int initialize_app(const char *user, const char *app_id,
       free(app_dir);
       free(app_dir);
     }
     }
   }
   }
-  free_values(nm_roots);
+
   if (primary_app_dir == NULL) {
   if (primary_app_dir == NULL) {
     fprintf(LOGFILE, "Did not create any app directories\n");
     fprintf(LOGFILE, "Did not create any app directories\n");
     return -1;
     return -1;
@@ -805,9 +772,10 @@ int initialize_app(const char *user, const char *app_id,
 }
 }
 
 
 int launch_container_as_user(const char *user, const char *app_id, 
 int launch_container_as_user(const char *user, const char *app_id, 
-                     const char *container_id, const char *work_dir,
-                     const char *script_name, const char *cred_file,
-                     const char* pid_file) {
+                   const char *container_id, const char *work_dir,
+                   const char *script_name, const char *cred_file,
+                   const char* pid_file, char* const* local_dirs,
+                   char* const* log_dirs) {
   int exit_code = -1;
   int exit_code = -1;
   char *script_file_dest = NULL;
   char *script_file_dest = NULL;
   char *cred_file_dest = NULL;
   char *cred_file_dest = NULL;
@@ -854,7 +822,8 @@ int launch_container_as_user(const char *user, const char *app_id,
     goto cleanup;
     goto cleanup;
   }
   }
 
 
-  if (create_container_directories(user, app_id, container_id) != 0) {
+  if (create_container_directories(user, app_id, container_id, local_dirs,
+                                   log_dirs) != 0) {
     fprintf(LOGFILE, "Could not create container dirs");
     fprintf(LOGFILE, "Could not create container dirs");
     goto cleanup;
     goto cleanup;
   }
   }

+ 8 - 6
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h

@@ -61,8 +61,6 @@ enum errorcodes {
 #define NM_APP_DIR_PATTERN USER_DIR_PATTERN "/appcache/%s"
 #define NM_APP_DIR_PATTERN USER_DIR_PATTERN "/appcache/%s"
 #define CONTAINER_DIR_PATTERN NM_APP_DIR_PATTERN "/%s"
 #define CONTAINER_DIR_PATTERN NM_APP_DIR_PATTERN "/%s"
 #define CONTAINER_SCRIPT "launch_container.sh"
 #define CONTAINER_SCRIPT "launch_container.sh"
-#define NM_SYS_DIR_KEY "yarn.nodemanager.local-dirs"
-#define NM_LOG_DIR_KEY "yarn.nodemanager.log-dirs"
 #define CREDENTIALS_FILENAME "container_tokens"
 #define CREDENTIALS_FILENAME "container_tokens"
 #define MIN_USERID_KEY "min.user.id"
 #define MIN_USERID_KEY "min.user.id"
 #define BANNED_USERS_KEY "banned.users"
 #define BANNED_USERS_KEY "banned.users"
@@ -92,12 +90,13 @@ int check_executor_permissions(char *executable_file);
 
 
 // initialize the application directory
 // initialize the application directory
 int initialize_app(const char *user, const char *app_id,
 int initialize_app(const char *user, const char *app_id,
-                   const char *credentials, char* const* args);
+                   const char *credentials, char* const* local_dirs,
+                   char* const* log_dirs, char* const* args);
 
 
 /*
 /*
  * Function used to launch a container as the provided user. It does the following :
  * Function used to launch a container as the provided user. It does the following :
  * 1) Creates container work dir and log dir to be accessible by the child
  * 1) Creates container work dir and log dir to be accessible by the child
- * 2) Copies the script file from the TT to the work directory
+ * 2) Copies the script file from the NM to the work directory
  * 3) Sets up the environment
  * 3) Sets up the environment
  * 4) Does an execlp on the same in order to replace the current image with
  * 4) Does an execlp on the same in order to replace the current image with
  *    container image.
  *    container image.
@@ -109,12 +108,15 @@ int initialize_app(const char *user, const char *app_id,
  * @param cred_file the credentials file that needs to be compied to the
  * @param cred_file the credentials file that needs to be compied to the
  * working directory.
  * working directory.
  * @param pid_file file where pid of process should be written to
  * @param pid_file file where pid of process should be written to
+ * @param local_dirs nodemanager-local-directories to be used
+ * @param log_dirs nodemanager-log-directories to be used
  * @return -1 or errorcode enum value on error (should never return on success).
  * @return -1 or errorcode enum value on error (should never return on success).
  */
  */
 int launch_container_as_user(const char * user, const char *app_id,
 int launch_container_as_user(const char * user, const char *app_id,
                      const char *container_id, const char *work_dir,
                      const char *container_id, const char *work_dir,
                      const char *script_name, const char *cred_file,
                      const char *script_name, const char *cred_file,
-                     const char *pid_file);
+                     const char *pid_file, char* const* local_dirs,
+                     char* const* log_dirs);
 
 
 /**
 /**
  * Function used to signal a container launched by the user.
  * Function used to signal a container launched by the user.
@@ -181,7 +183,7 @@ int mkdirs(const char* path, mode_t perm);
 /**
 /**
  * Function to initialize the user directories of a user.
  * Function to initialize the user directories of a user.
  */
  */
-int initialize_user(const char *user);
+int initialize_user(const char *user, char* const* local_dirs);
 
 
 /**
 /**
  * Create a top level directory for the user.
  * Create a top level directory for the user.

+ 22 - 13
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c

@@ -43,10 +43,11 @@ void display_usage(FILE *stream) {
   fprintf(stream,
   fprintf(stream,
       "Usage: container-executor user command command-args\n");
       "Usage: container-executor user command command-args\n");
   fprintf(stream, "Commands:\n");
   fprintf(stream, "Commands:\n");
-  fprintf(stream, "   initialize container: %2d appid tokens cmd app...\n",
-	  INITIALIZE_CONTAINER);
+  fprintf(stream, "   initialize container: %2d appid tokens " \
+   "nm-local-dirs nm-log-dirs cmd app...\n", INITIALIZE_CONTAINER);
   fprintf(stream,
   fprintf(stream,
-      "   launch container:    %2d appid containerid workdir container-script tokens pidfile\n",
+      "   launch container:    %2d appid containerid workdir "\
+      "container-script tokens pidfile nm-local-dirs nm-log-dirs\n",
 	  LAUNCH_CONTAINER);
 	  LAUNCH_CONTAINER);
   fprintf(stream, "   signal container:    %2d container-pid signal\n",
   fprintf(stream, "   signal container:    %2d container-pid signal\n",
 	  SIGNAL_CONTAINER);
 	  SIGNAL_CONTAINER);
@@ -96,6 +97,7 @@ int main(int argc, char **argv) {
 
 
   char *orig_conf_file = STRINGIFY(HADOOP_CONF_DIR) "/" CONF_FILENAME;
   char *orig_conf_file = STRINGIFY(HADOOP_CONF_DIR) "/" CONF_FILENAME;
   char *conf_file = realpath(orig_conf_file, NULL);
   char *conf_file = realpath(orig_conf_file, NULL);
+  char *local_dirs, *log_dirs;
 
 
   if (conf_file == NULL) {
   if (conf_file == NULL) {
     fprintf(ERRORFILE, "Configuration file %s not found.\n", orig_conf_file);
     fprintf(ERRORFILE, "Configuration file %s not found.\n", orig_conf_file);
@@ -158,20 +160,23 @@ int main(int argc, char **argv) {
 
 
   switch (command) {
   switch (command) {
   case INITIALIZE_CONTAINER:
   case INITIALIZE_CONTAINER:
-    if (argc < 6) {
-      fprintf(ERRORFILE, "Too few arguments (%d vs 6) for initialize container\n",
+    if (argc < 8) {
+      fprintf(ERRORFILE, "Too few arguments (%d vs 8) for initialize container\n",
 	      argc);
 	      argc);
       fflush(ERRORFILE);
       fflush(ERRORFILE);
       return INVALID_ARGUMENT_NUMBER;
       return INVALID_ARGUMENT_NUMBER;
     }
     }
     app_id = argv[optind++];
     app_id = argv[optind++];
     cred_file = argv[optind++];
     cred_file = argv[optind++];
+    local_dirs = argv[optind++];// good local dirs as a comma separated list
+    log_dirs = argv[optind++];// good log dirs as a comma separated list
     exit_code = initialize_app(user_detail->pw_name, app_id, cred_file,
     exit_code = initialize_app(user_detail->pw_name, app_id, cred_file,
-                               argv + optind);
+                               extract_values(local_dirs),
+                               extract_values(log_dirs), argv + optind);
     break;
     break;
   case LAUNCH_CONTAINER:
   case LAUNCH_CONTAINER:
-    if (argc < 9) {
-      fprintf(ERRORFILE, "Too few arguments (%d vs 9) for launch container\n",
+    if (argc != 11) {
+      fprintf(ERRORFILE, "Too few arguments (%d vs 11) for launch container\n",
 	      argc);
 	      argc);
       fflush(ERRORFILE);
       fflush(ERRORFILE);
       return INVALID_ARGUMENT_NUMBER;
       return INVALID_ARGUMENT_NUMBER;
@@ -182,13 +187,17 @@ int main(int argc, char **argv) {
     script_file = argv[optind++];
     script_file = argv[optind++];
     cred_file = argv[optind++];
     cred_file = argv[optind++];
     pid_file = argv[optind++];
     pid_file = argv[optind++];
-    exit_code = launch_container_as_user(user_detail->pw_name, app_id, container_id,
-                                 current_dir, script_file, cred_file, pid_file);
+    local_dirs = argv[optind++];// good local dirs as a comma separated list
+    log_dirs = argv[optind++];// good log dirs as a comma separated list
+    exit_code = launch_container_as_user(user_detail->pw_name, app_id,
+                    container_id, current_dir, script_file, cred_file,
+                    pid_file, extract_values(local_dirs),
+                    extract_values(log_dirs));
     break;
     break;
   case SIGNAL_CONTAINER:
   case SIGNAL_CONTAINER:
-    if (argc < 5) {
-      fprintf(ERRORFILE, "Too few arguments (%d vs 5) for signal container\n",
-	      argc);
+    if (argc != 5) {
+      fprintf(ERRORFILE, "Wrong number of arguments (%d vs 5) for " \
+          "signal container\n", argc);
       fflush(ERRORFILE);
       fflush(ERRORFILE);
       return INVALID_ARGUMENT_NUMBER;
       return INVALID_ARGUMENT_NUMBER;
     } else {
     } else {

+ 24 - 17
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c

@@ -28,10 +28,17 @@
 #include <sys/stat.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <sys/wait.h>
 
 
-#define TEST_ROOT "/tmp/test-container-controller"
+#define TEST_ROOT "/tmp/test-container-executor"
 #define DONT_TOUCH_FILE "dont-touch-me"
 #define DONT_TOUCH_FILE "dont-touch-me"
+#define NM_LOCAL_DIRS       TEST_ROOT "/local-1," TEST_ROOT "/local-2," \
+               TEST_ROOT "/local-3," TEST_ROOT "/local-4," TEST_ROOT "/local-5"
+#define NM_LOG_DIRS         TEST_ROOT "/logdir_1," TEST_ROOT "/logdir_2," \
+                            TEST_ROOT "/logdir_3," TEST_ROOT "/logdir_4"
+#define ARRAY_SIZE 1000
 
 
 static char* username = NULL;
 static char* username = NULL;
+static char* local_dirs = NULL;
+static char* log_dirs = NULL;
 
 
 /**
 /**
  * Run the command using the effective user id.
  * Run the command using the effective user id.
@@ -84,40 +91,33 @@ void run(const char *cmd) {
 
 
 int write_config_file(char *file_name) {
 int write_config_file(char *file_name) {
   FILE *file;
   FILE *file;
-  int i = 0;
   file = fopen(file_name, "w");
   file = fopen(file_name, "w");
   if (file == NULL) {
   if (file == NULL) {
     printf("Failed to open %s.\n", file_name);
     printf("Failed to open %s.\n", file_name);
     return EXIT_FAILURE;
     return EXIT_FAILURE;
   }
   }
-  fprintf(file, "yarn.nodemanager.local-dirs=" TEST_ROOT "/local-1");
-  for(i=2; i < 5; ++i) {
-    fprintf(file, "," TEST_ROOT "/local-%d", i);
-  }
-  fprintf(file, "\n");
-  fprintf(file, "yarn.nodemanager.log-dirs=" TEST_ROOT "/logs\n");
+  fprintf(file, "banned.users=bannedUser\n");
+  fprintf(file, "min.user.id=1000\n");
   fclose(file);
   fclose(file);
   return 0;
   return 0;
 }
 }
 
 
-void create_nm_roots() {
-  char** nm_roots = get_values(NM_SYS_DIR_KEY);
+void create_nm_roots(char ** nm_roots) {
   char** nm_root;
   char** nm_root;
   for(nm_root=nm_roots; *nm_root != NULL; ++nm_root) {
   for(nm_root=nm_roots; *nm_root != NULL; ++nm_root) {
     if (mkdir(*nm_root, 0755) != 0) {
     if (mkdir(*nm_root, 0755) != 0) {
       printf("FAIL: Can't create directory %s - %s\n", *nm_root,
       printf("FAIL: Can't create directory %s - %s\n", *nm_root,
-	     strerror(errno));
+             strerror(errno));
       exit(1);
       exit(1);
     }
     }
     char buffer[100000];
     char buffer[100000];
     sprintf(buffer, "%s/usercache", *nm_root);
     sprintf(buffer, "%s/usercache", *nm_root);
     if (mkdir(buffer, 0755) != 0) {
     if (mkdir(buffer, 0755) != 0) {
       printf("FAIL: Can't create directory %s - %s\n", buffer,
       printf("FAIL: Can't create directory %s - %s\n", buffer,
-	     strerror(errno));
+             strerror(errno));
       exit(1);
       exit(1);
     }
     }
   }
   }
-  free_values(nm_roots);
 }
 }
 
 
 void test_get_user_directory() {
 void test_get_user_directory() {
@@ -209,7 +209,7 @@ void test_check_configuration_permissions() {
 }
 }
 
 
 void test_delete_container() {
 void test_delete_container() {
-  if (initialize_user(username)) {
+  if (initialize_user(username, extract_values(local_dirs))) {
     printf("FAIL: failed to initialize user %s\n", username);
     printf("FAIL: failed to initialize user %s\n", username);
     exit(1);
     exit(1);
   }
   }
@@ -504,7 +504,8 @@ void test_init_app() {
     exit(1);
     exit(1);
   } else if (child == 0) {
   } else if (child == 0) {
     char *final_pgm[] = {"touch", "my-touch-file", 0};
     char *final_pgm[] = {"touch", "my-touch-file", 0};
-    if (initialize_app(username, "app_4", TEST_ROOT "/creds.txt", final_pgm) != 0) {
+    if (initialize_app(username, "app_4", TEST_ROOT "/creds.txt", final_pgm,
+        extract_values(local_dirs), extract_values(log_dirs)) != 0) {
       printf("FAIL: failed in child\n");
       printf("FAIL: failed in child\n");
       exit(42);
       exit(42);
     }
     }
@@ -598,7 +599,8 @@ void test_run_container() {
     exit(1);
     exit(1);
   } else if (child == 0) {
   } else if (child == 0) {
     if (launch_container_as_user(username, "app_4", "container_1", 
     if (launch_container_as_user(username, "app_4", "container_1", 
-                         container_dir, script_name, TEST_ROOT "/creds.txt", pid_file) != 0) {
+          container_dir, script_name, TEST_ROOT "/creds.txt", pid_file,
+          extract_values(local_dirs), extract_values(log_dirs)) != 0) {
       printf("FAIL: failed in child\n");
       printf("FAIL: failed in child\n");
       exit(42);
       exit(42);
     }
     }
@@ -677,7 +679,12 @@ int main(int argc, char **argv) {
   }
   }
   read_config(TEST_ROOT "/test.cfg");
   read_config(TEST_ROOT "/test.cfg");
 
 
-  create_nm_roots();
+  local_dirs = (char *) malloc (sizeof(char) * ARRAY_SIZE);
+  strcpy(local_dirs, NM_LOCAL_DIRS);
+  log_dirs = (char *) malloc (sizeof(char) * ARRAY_SIZE);
+  strcpy(log_dirs, NM_LOG_DIRS);
+
+  create_nm_roots(extract_values(local_dirs));
 
 
   if (getuid() == 0 && argc == 2) {
   if (getuid() == 0 && argc == 2) {
     username = argv[1];
     username = argv[1];

+ 11 - 7
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java

@@ -60,16 +60,18 @@ public class DummyContainerManager extends ContainerManagerImpl {
       DeletionService deletionContext, NodeStatusUpdater nodeStatusUpdater,
       DeletionService deletionContext, NodeStatusUpdater nodeStatusUpdater,
       NodeManagerMetrics metrics,
       NodeManagerMetrics metrics,
       ContainerTokenSecretManager containerTokenSecretManager,
       ContainerTokenSecretManager containerTokenSecretManager,
-      ApplicationACLsManager applicationACLsManager) {
+      ApplicationACLsManager applicationACLsManager,
+      LocalDirsHandlerService dirsHandler) {
     super(context, exec, deletionContext, nodeStatusUpdater, metrics,
     super(context, exec, deletionContext, nodeStatusUpdater, metrics,
-        containerTokenSecretManager, applicationACLsManager);
+        containerTokenSecretManager, applicationACLsManager, dirsHandler);
   }
   }
 
 
   @Override
   @Override
   @SuppressWarnings("unchecked")
   @SuppressWarnings("unchecked")
-  protected ResourceLocalizationService createResourceLocalizationService(ContainerExecutor exec,
-      DeletionService deletionContext) {
-    return new ResourceLocalizationService(super.dispatcher, exec, deletionContext) {
+  protected ResourceLocalizationService createResourceLocalizationService(
+      ContainerExecutor exec, DeletionService deletionContext) {
+    return new ResourceLocalizationService(super.dispatcher, exec,
+        deletionContext, super.dirsHandler) {
       @Override
       @Override
       public void handle(LocalizationEvent event) {
       public void handle(LocalizationEvent event) {
         switch (event.getType()) {
         switch (event.getType()) {
@@ -125,7 +127,8 @@ public class DummyContainerManager extends ContainerManagerImpl {
   @SuppressWarnings("unchecked")
   @SuppressWarnings("unchecked")
   protected ContainersLauncher createContainersLauncher(Context context,
   protected ContainersLauncher createContainersLauncher(Context context,
       ContainerExecutor exec) {
       ContainerExecutor exec) {
-    return new ContainersLauncher(context, super.dispatcher, exec) {
+    return new ContainersLauncher(context, super.dispatcher, exec,
+                                  super.dirsHandler) {
       @Override
       @Override
       public void handle(ContainersLauncherEvent event) {
       public void handle(ContainersLauncherEvent event) {
         Container container = event.getContainer();
         Container container = event.getContainer();
@@ -139,7 +142,8 @@ public class DummyContainerManager extends ContainerManagerImpl {
         case CLEANUP_CONTAINER:
         case CLEANUP_CONTAINER:
           dispatcher.getEventHandler().handle(
           dispatcher.getEventHandler().handle(
               new ContainerExitEvent(containerId,
               new ContainerExitEvent(containerId,
-                  ContainerEventType.CONTAINER_KILLED_ON_REQUEST, 0));
+                  ContainerEventType.CONTAINER_KILLED_ON_REQUEST, 0,
+                  "Container exited with exit code 0."));
           break;
           break;
         }
         }
       }
       }

+ 6 - 3
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java

@@ -21,7 +21,6 @@ package org.apache.hadoop.yarn.server.nodemanager;
 import java.io.File;
 import java.io.File;
 import java.io.IOException;
 import java.io.IOException;
 
 
-import org.apache.hadoop.NodeHealthCheckerService;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
@@ -80,9 +79,12 @@ public class TestEventFlow {
 
 
     ContainerExecutor exec = new DefaultContainerExecutor();
     ContainerExecutor exec = new DefaultContainerExecutor();
     exec.setConf(conf);
     exec.setConf(conf);
+
     DeletionService del = new DeletionService(exec);
     DeletionService del = new DeletionService(exec);
     Dispatcher dispatcher = new AsyncDispatcher();
     Dispatcher dispatcher = new AsyncDispatcher();
-    NodeHealthCheckerService healthChecker = null;
+    NodeHealthCheckerService healthChecker = new NodeHealthCheckerService();
+    healthChecker.init(conf);
+    LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler();
     NodeManagerMetrics metrics = NodeManagerMetrics.create();
     NodeManagerMetrics metrics = NodeManagerMetrics.create();
     ContainerTokenSecretManager containerTokenSecretManager =  new ContainerTokenSecretManager();
     ContainerTokenSecretManager containerTokenSecretManager =  new ContainerTokenSecretManager();
     NodeStatusUpdater nodeStatusUpdater =
     NodeStatusUpdater nodeStatusUpdater =
@@ -100,7 +102,8 @@ public class TestEventFlow {
 
 
     DummyContainerManager containerManager = new DummyContainerManager(
     DummyContainerManager containerManager = new DummyContainerManager(
         context, exec, del, nodeStatusUpdater, metrics,
         context, exec, del, nodeStatusUpdater, metrics,
-        containerTokenSecretManager, new ApplicationACLsManager(conf));
+        containerTokenSecretManager, new ApplicationACLsManager(conf),
+        dirsHandler);
     containerManager.init(conf);
     containerManager.init(conf);
     containerManager.start();
     containerManager.start();
 
 

+ 18 - 6
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java

@@ -37,6 +37,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
@@ -63,8 +64,6 @@ import org.junit.Test;
  * config values.
  * config values.
  * <br><pre><code>
  * <br><pre><code>
  * > cat /etc/hadoop/container-executor.cfg
  * > cat /etc/hadoop/container-executor.cfg
- * yarn.nodemanager.local-dirs=/tmp/hadoop/nm-local/
- * yarn.nodemanager.log-dirs=/tmp/hadoop/nm-log
  * yarn.nodemanager.linux-container-executor.group=mapred
  * yarn.nodemanager.linux-container-executor.group=mapred
  * #depending on the user id of the application.submitter option
  * #depending on the user id of the application.submitter option
  * min.user.id=1
  * min.user.id=1
@@ -72,7 +71,7 @@ import org.junit.Test;
  * > sudo chmod 444 /etc/hadoop/container-executor.cfg
  * > sudo chmod 444 /etc/hadoop/container-executor.cfg
  * </code></pre>
  * </code></pre>
  * 
  * 
- * <li>iMove the binary and set proper permissions on it. It needs to be owned 
+ * <li>Move the binary and set proper permissions on it. It needs to be owned 
  * by root, the group needs to be the group configured in container-executor.cfg, 
  * by root, the group needs to be the group configured in container-executor.cfg, 
  * and it needs the setuid bit set. (The build will also overwrite it so you
  * and it needs the setuid bit set. (The build will also overwrite it so you
  * need to move it to a place that you can support it. 
  * need to move it to a place that you can support it. 
@@ -98,14 +97,22 @@ public class TestLinuxContainerExecutor {
   
   
   private LinuxContainerExecutor exec = null;
   private LinuxContainerExecutor exec = null;
   private String appSubmitter = null;
   private String appSubmitter = null;
+  private LocalDirsHandlerService dirsHandler;
 
 
   @Before
   @Before
   public void setup() throws Exception {
   public void setup() throws Exception {
-    FileContext.getLocalFSFileContext().mkdir(
-        new Path(workSpace.getAbsolutePath()), null, true);
+    FileContext files = FileContext.getLocalFSFileContext();
+    Path workSpacePath = new Path(workSpace.getAbsolutePath());
+    files.mkdir(workSpacePath, null, true);
     workSpace.setReadable(true, false);
     workSpace.setReadable(true, false);
     workSpace.setExecutable(true, false);
     workSpace.setExecutable(true, false);
     workSpace.setWritable(true, false);
     workSpace.setWritable(true, false);
+    File localDir = new File(workSpace.getAbsoluteFile(), "localDir");
+    files.mkdir(new Path(localDir.getAbsolutePath()),
+        new FsPermission("777"), false);
+    File logDir = new File(workSpace.getAbsoluteFile(), "logDir");
+    files.mkdir(new Path(logDir.getAbsolutePath()),
+        new FsPermission("777"), false);
     String exec_path = System.getProperty("container-executor.path");
     String exec_path = System.getProperty("container-executor.path");
     if(exec_path != null && !exec_path.isEmpty()) {
     if(exec_path != null && !exec_path.isEmpty()) {
       Configuration conf = new Configuration(false);
       Configuration conf = new Configuration(false);
@@ -114,6 +121,10 @@ public class TestLinuxContainerExecutor {
       conf.set(YarnConfiguration.NM_LINUX_CONTAINER_EXECUTOR_PATH, exec_path);
       conf.set(YarnConfiguration.NM_LINUX_CONTAINER_EXECUTOR_PATH, exec_path);
       exec = new LinuxContainerExecutor();
       exec = new LinuxContainerExecutor();
       exec.setConf(conf);
       exec.setConf(conf);
+      conf.set(YarnConfiguration.NM_LOCAL_DIRS, localDir.getAbsolutePath());
+      conf.set(YarnConfiguration.NM_LOG_DIRS, logDir.getAbsolutePath());
+      dirsHandler = new LocalDirsHandlerService();
+      dirsHandler.init(conf);
     }
     }
     appSubmitter = System.getProperty("application.submitter");
     appSubmitter = System.getProperty("application.submitter");
     if(appSubmitter == null || appSubmitter.isEmpty()) {
     if(appSubmitter == null || appSubmitter.isEmpty()) {
@@ -189,7 +200,8 @@ public class TestLinuxContainerExecutor {
 
 
     exec.activateContainer(cId, pidFile);
     exec.activateContainer(cId, pidFile);
     return exec.launchContainer(container, scriptPath, tokensPath,
     return exec.launchContainer(container, scriptPath, tokensPath,
-        appSubmitter, appId, workDir);
+        appSubmitter, appId, workDir, dirsHandler.getLocalDirs(),
+        dirsHandler.getLogDirs());
   }
   }
   
   
   
   

+ 9 - 2
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java

@@ -35,6 +35,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
@@ -51,6 +52,7 @@ public class TestLinuxContainerExecutorWithMocks {
 
 
   private LinuxContainerExecutor mockExec = null;
   private LinuxContainerExecutor mockExec = null;
   private final File mockParamFile = new File("./params.txt");
   private final File mockParamFile = new File("./params.txt");
+  private LocalDirsHandlerService dirsHandler;
   
   
   private void deleteMockParamFile() {
   private void deleteMockParamFile() {
     if(mockParamFile.exists()) {
     if(mockParamFile.exists()) {
@@ -80,6 +82,8 @@ public class TestLinuxContainerExecutorWithMocks {
     Configuration conf = new Configuration();
     Configuration conf = new Configuration();
     conf.set(YarnConfiguration.NM_LINUX_CONTAINER_EXECUTOR_PATH, executorPath);
     conf.set(YarnConfiguration.NM_LINUX_CONTAINER_EXECUTOR_PATH, executorPath);
     mockExec = new LinuxContainerExecutor();
     mockExec = new LinuxContainerExecutor();
+    dirsHandler = new LocalDirsHandlerService();
+    dirsHandler.init(conf);
     mockExec.setConf(conf);
     mockExec.setConf(conf);
   }
   }
 
 
@@ -114,10 +118,13 @@ public class TestLinuxContainerExecutorWithMocks {
 
 
     mockExec.activateContainer(cId, pidFile);
     mockExec.activateContainer(cId, pidFile);
     int ret = mockExec.launchContainer(container, scriptPath, tokensPath,
     int ret = mockExec.launchContainer(container, scriptPath, tokensPath,
-        appSubmitter, appId, workDir);
+        appSubmitter, appId, workDir, dirsHandler.getLocalDirs(),
+        dirsHandler.getLogDirs());
     assertEquals(0, ret);
     assertEquals(0, ret);
     assertEquals(Arrays.asList(appSubmitter, cmd, appId, containerId,
     assertEquals(Arrays.asList(appSubmitter, cmd, appId, containerId,
-        workDir.toString(), "/bin/echo", "/dev/null", pidFile.toString()),
+        workDir.toString(), "/bin/echo", "/dev/null", pidFile.toString(),
+        StringUtils.join(",", dirsHandler.getLocalDirs()),
+        StringUtils.join(",", dirsHandler.getLogDirs())),
         readMockParams());
         readMockParams());
   }
   }
 
 

+ 47 - 27
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/TestNodeHealthService.java → hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java

@@ -16,7 +16,7 @@
  * limitations under the License.
  * limitations under the License.
  */
  */
 
 
-package org.apache.hadoop;
+package org.apache.hadoop.yarn.server.nodemanager;
 
 
 import java.io.File;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.FileOutputStream;
@@ -88,24 +88,31 @@ public class TestNodeHealthService {
   public void testNodeHealthScriptShouldRun() throws IOException {
   public void testNodeHealthScriptShouldRun() throws IOException {
     // Node health script should not start if there is no property called
     // Node health script should not start if there is no property called
     // node health script path.
     // node health script path.
-    Assert.assertFalse("By default Health checker should not have started",
-        NodeHealthCheckerService.shouldRun(new Configuration()));
+    Assert.assertFalse("By default Health script should not have started",
+        NodeHealthScriptRunner.shouldRun(new Configuration()));
     Configuration conf = getConfForNodeHealthScript();
     Configuration conf = getConfForNodeHealthScript();
     // Node health script should not start if the node health script does not
     // Node health script should not start if the node health script does not
     // exists
     // exists
-    Assert.assertFalse("Node health script should start", NodeHealthCheckerService
-        .shouldRun(conf));
+    Assert.assertFalse("Node health script should start",
+        NodeHealthScriptRunner.shouldRun(conf));
     // Create script path.
     // Create script path.
     conf.writeXml(new FileOutputStream(nodeHealthConfigFile));
     conf.writeXml(new FileOutputStream(nodeHealthConfigFile));
     conf.addResource(nodeHealthConfigFile.getName());
     conf.addResource(nodeHealthConfigFile.getName());
     writeNodeHealthScriptFile("", false);
     writeNodeHealthScriptFile("", false);
     // Node health script should not start if the node health script is not
     // Node health script should not start if the node health script is not
     // executable.
     // executable.
-    Assert.assertFalse("Node health script should start", NodeHealthCheckerService
-        .shouldRun(conf));
+    Assert.assertFalse("Node health script should start",
+        NodeHealthScriptRunner.shouldRun(conf));
     writeNodeHealthScriptFile("", true);
     writeNodeHealthScriptFile("", true);
-    Assert.assertTrue("Node health script should start", NodeHealthCheckerService
-        .shouldRun(conf));
+    Assert.assertTrue("Node health script should start",
+        NodeHealthScriptRunner.shouldRun(conf));
+  }
+
+  private void setHealthStatus(NodeHealthStatus healthStatus, boolean isHealthy,
+      String healthReport, long lastHealthReportTime) {
+    healthStatus.setHealthReport(healthReport);
+    healthStatus.setIsNodeHealthy(isHealthy);
+    healthStatus.setLastHealthReportTime(lastHealthReportTime);
   }
   }
 
 
   @Test
   @Test
@@ -120,54 +127,67 @@ public class TestNodeHealthService {
     conf.writeXml(new FileOutputStream(nodeHealthConfigFile));
     conf.writeXml(new FileOutputStream(nodeHealthConfigFile));
     conf.addResource(nodeHealthConfigFile.getName());
     conf.addResource(nodeHealthConfigFile.getName());
 
 
-    NodeHealthCheckerService nodeHealthChecker = new NodeHealthCheckerService(
-        conf);
-    TimerTask timer = nodeHealthChecker.getTimer();
     writeNodeHealthScriptFile(normalScript, true);
     writeNodeHealthScriptFile(normalScript, true);
-    timer.run();
+    NodeHealthCheckerService nodeHealthChecker = new NodeHealthCheckerService();
+    nodeHealthChecker.init(conf);
+    NodeHealthScriptRunner nodeHealthScriptRunner =
+        nodeHealthChecker.getNodeHealthScriptRunner();
+    TimerTask timerTask = nodeHealthScriptRunner.getTimerTask();
 
 
-    nodeHealthChecker.setHealthStatus(healthStatus);
+    timerTask.run();
+
+    setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(),
+        nodeHealthChecker.getHealthReport(),
+        nodeHealthChecker.getLastHealthReportTime());
     LOG.info("Checking initial healthy condition");
     LOG.info("Checking initial healthy condition");
     // Check proper report conditions.
     // Check proper report conditions.
     Assert.assertTrue("Node health status reported unhealthy", healthStatus
     Assert.assertTrue("Node health status reported unhealthy", healthStatus
         .getIsNodeHealthy());
         .getIsNodeHealthy());
     Assert.assertTrue("Node health status reported unhealthy", healthStatus
     Assert.assertTrue("Node health status reported unhealthy", healthStatus
-        .getHealthReport().isEmpty());
+        .getHealthReport().equals(nodeHealthChecker.getHealthReport()));
 
 
     // write out error file.
     // write out error file.
     // Healthy to unhealthy transition
     // Healthy to unhealthy transition
     writeNodeHealthScriptFile(errorScript, true);
     writeNodeHealthScriptFile(errorScript, true);
     // Run timer
     // Run timer
-    timer.run();
+    timerTask.run();
     // update health status
     // update health status
-    nodeHealthChecker.setHealthStatus(healthStatus);
+    setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(),
+        nodeHealthChecker.getHealthReport(),
+        nodeHealthChecker.getLastHealthReportTime());
     LOG.info("Checking Healthy--->Unhealthy");
     LOG.info("Checking Healthy--->Unhealthy");
     Assert.assertFalse("Node health status reported healthy", healthStatus
     Assert.assertFalse("Node health status reported healthy", healthStatus
         .getIsNodeHealthy());
         .getIsNodeHealthy());
-    Assert.assertFalse("Node health status reported healthy", healthStatus
-        .getHealthReport().isEmpty());
+    Assert.assertTrue("Node health status reported healthy", healthStatus
+        .getHealthReport().equals(nodeHealthChecker.getHealthReport()));
     
     
     // Check unhealthy to healthy transitions.
     // Check unhealthy to healthy transitions.
     writeNodeHealthScriptFile(normalScript, true);
     writeNodeHealthScriptFile(normalScript, true);
-    timer.run();
-    nodeHealthChecker.setHealthStatus(healthStatus);
+    timerTask.run();
+    setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(),
+        nodeHealthChecker.getHealthReport(),
+        nodeHealthChecker.getLastHealthReportTime());
     LOG.info("Checking UnHealthy--->healthy");
     LOG.info("Checking UnHealthy--->healthy");
     // Check proper report conditions.
     // Check proper report conditions.
     Assert.assertTrue("Node health status reported unhealthy", healthStatus
     Assert.assertTrue("Node health status reported unhealthy", healthStatus
         .getIsNodeHealthy());
         .getIsNodeHealthy());
     Assert.assertTrue("Node health status reported unhealthy", healthStatus
     Assert.assertTrue("Node health status reported unhealthy", healthStatus
-        .getHealthReport().isEmpty());
+        .getHealthReport().equals(nodeHealthChecker.getHealthReport()));
 
 
     // Healthy to timeout transition.
     // Healthy to timeout transition.
     writeNodeHealthScriptFile(timeOutScript, true);
     writeNodeHealthScriptFile(timeOutScript, true);
-    timer.run();
-    nodeHealthChecker.setHealthStatus(healthStatus);
+    timerTask.run();
+    setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(),
+        nodeHealthChecker.getHealthReport(),
+        nodeHealthChecker.getLastHealthReportTime());
     LOG.info("Checking Healthy--->timeout");
     LOG.info("Checking Healthy--->timeout");
     Assert.assertFalse("Node health status reported healthy even after timeout",
     Assert.assertFalse("Node health status reported healthy even after timeout",
         healthStatus.getIsNodeHealthy());
         healthStatus.getIsNodeHealthy());
-    Assert.assertEquals("Node time out message not propogated", healthStatus
-        .getHealthReport(),
-        NodeHealthCheckerService.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG);
+    Assert.assertTrue("Node script time out message not propogated",
+        healthStatus.getHealthReport().equals(
+            NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG
+            + NodeHealthCheckerService.SEPARATOR
+            + nodeHealthChecker.getDiskHandler().getDisksHealthReport()));
   }
   }
 
 
 }
 }

+ 3 - 3
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java

@@ -29,7 +29,6 @@ import java.util.concurrent.ConcurrentMap;
 
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.NodeHealthCheckerService;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
@@ -440,10 +439,11 @@ public class TestNodeStatusUpdater {
           ContainerExecutor exec, DeletionService del,
           ContainerExecutor exec, DeletionService del,
           NodeStatusUpdater nodeStatusUpdater,
           NodeStatusUpdater nodeStatusUpdater,
           ContainerTokenSecretManager containerTokenSecretManager,
           ContainerTokenSecretManager containerTokenSecretManager,
-          ApplicationACLsManager aclsManager) {
+          ApplicationACLsManager aclsManager,
+          LocalDirsHandlerService diskhandler) {
         return new ContainerManagerImpl(context, exec, del,
         return new ContainerManagerImpl(context, exec, del,
             nodeStatusUpdater, metrics, containerTokenSecretManager,
             nodeStatusUpdater, metrics, containerTokenSecretManager,
-            aclsManager) {
+            aclsManager, diskhandler) {
           @Override
           @Override
           public void start() {
           public void start() {
             // Simulating failure of starting RPC server
             // Simulating failure of starting RPC server

+ 8 - 1
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java

@@ -45,7 +45,9 @@ import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
 import org.apache.hadoop.yarn.server.nodemanager.LocalRMInterface;
 import org.apache.hadoop.yarn.server.nodemanager.LocalRMInterface;
+import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService;
 import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext;
 import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext;
 import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
 import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
 import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl;
 import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl;
@@ -94,6 +96,8 @@ public abstract class BaseContainerManagerTest {
   protected ContainerExecutor exec;
   protected ContainerExecutor exec;
   protected DeletionService delSrvc;
   protected DeletionService delSrvc;
   protected String user = "nobody";
   protected String user = "nobody";
+  protected NodeHealthCheckerService nodeHealthChecker;
+  protected LocalDirsHandlerService dirsHandler;
 
 
   protected NodeStatusUpdater nodeStatusUpdater = new NodeStatusUpdaterImpl(
   protected NodeStatusUpdater nodeStatusUpdater = new NodeStatusUpdaterImpl(
       context, new AsyncDispatcher(), null, metrics, this.containerTokenSecretManager) {
       context, new AsyncDispatcher(), null, metrics, this.containerTokenSecretManager) {
@@ -147,9 +151,12 @@ public abstract class BaseContainerManagerTest {
     delSrvc.init(conf);
     delSrvc.init(conf);
 
 
     exec = createContainerExecutor();
     exec = createContainerExecutor();
+    nodeHealthChecker = new NodeHealthCheckerService();
+    nodeHealthChecker.init(conf);
+    dirsHandler = nodeHealthChecker.getDiskHandler();
     containerManager = new ContainerManagerImpl(context, exec, delSrvc,
     containerManager = new ContainerManagerImpl(context, exec, delSrvc,
         nodeStatusUpdater, metrics, this.containerTokenSecretManager,
         nodeStatusUpdater, metrics, this.containerTokenSecretManager,
-        new ApplicationACLsManager(conf));
+        new ApplicationACLsManager(conf), dirsHandler);
     containerManager.init(conf);
     containerManager.init(conf);
   }
   }
 
 

+ 2 - 1
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java

@@ -383,11 +383,12 @@ public class TestContainerManager extends BaseContainerManagerTest {
     // Real del service
     // Real del service
     delSrvc = new DeletionService(exec);
     delSrvc = new DeletionService(exec);
     delSrvc.init(conf);
     delSrvc.init(conf);
+
     ContainerTokenSecretManager containerTokenSecretManager = new 
     ContainerTokenSecretManager containerTokenSecretManager = new 
         ContainerTokenSecretManager();
         ContainerTokenSecretManager();
     containerManager = new ContainerManagerImpl(context, exec, delSrvc,
     containerManager = new ContainerManagerImpl(context, exec, delSrvc,
         nodeStatusUpdater, metrics, containerTokenSecretManager,
         nodeStatusUpdater, metrics, containerTokenSecretManager,
-        new ApplicationACLsManager(conf));
+        new ApplicationACLsManager(conf), dirsHandler);
     containerManager.init(conf);
     containerManager.init(conf);
     containerManager.start();
     containerManager.start();
 
 

+ 6 - 3
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java

@@ -25,6 +25,7 @@ import static org.mockito.Mockito.reset;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 import static org.mockito.Mockito.when;
 
 
+import java.io.IOException;
 import java.net.URISyntaxException;
 import java.net.URISyntaxException;
 import java.nio.ByteBuffer;
 import java.nio.ByteBuffer;
 import java.util.AbstractMap.SimpleEntry;
 import java.util.AbstractMap.SimpleEntry;
@@ -649,7 +650,8 @@ public class TestContainer {
 
 
     public void containerFailed(int exitCode) {
     public void containerFailed(int exitCode) {
       c.handle(new ContainerExitEvent(cId,
       c.handle(new ContainerExitEvent(cId,
-          ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, exitCode));
+          ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, exitCode,
+          "Container completed with exit code " + exitCode));
       drainDispatcherEvents();
       drainDispatcherEvents();
     }
     }
 
 
@@ -659,9 +661,10 @@ public class TestContainer {
     }
     }
 
 
     public void containerKilledOnRequest() {
     public void containerKilledOnRequest() {
+      int exitCode = ExitCode.FORCE_KILLED.getExitCode();
       c.handle(new ContainerExitEvent(cId,
       c.handle(new ContainerExitEvent(cId,
-          ContainerEventType.CONTAINER_KILLED_ON_REQUEST, ExitCode.FORCE_KILLED
-              .getExitCode()));
+          ContainerEventType.CONTAINER_KILLED_ON_REQUEST, exitCode,
+          "Container completed with exit code " + exitCode));
       drainDispatcherEvents();
       drainDispatcherEvents();
     }
     }
     
     

+ 27 - 12
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java

@@ -59,6 +59,8 @@ import org.apache.hadoop.yarn.event.DrainDispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
+import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService;
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalResourceStatus;
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalResourceStatus;
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerAction;
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerAction;
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerHeartbeatResponse;
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerHeartbeatResponse;
@@ -109,19 +111,23 @@ public class TestResourceLocalizationService {
     doNothing().when(spylfs).mkdir(
     doNothing().when(spylfs).mkdir(
         isA(Path.class), isA(FsPermission.class), anyBoolean());
         isA(Path.class), isA(FsPermission.class), anyBoolean());
 
 
+    List<Path> localDirs = new ArrayList<Path>();
+    String[] sDirs = new String[4];
+    for (int i = 0; i < 4; ++i) {
+      localDirs.add(lfs.makeQualified(new Path(basedir, i + "")));
+      sDirs[i] = localDirs.get(i).toString();
+    }
+    conf.setStrings(YarnConfiguration.NM_LOCAL_DIRS, sDirs);
+    LocalDirsHandlerService diskhandler = new LocalDirsHandlerService();
+    diskhandler.init(conf);
+
     ResourceLocalizationService locService =
     ResourceLocalizationService locService =
-      spy(new ResourceLocalizationService(dispatcher, exec, delService));
+      spy(new ResourceLocalizationService(dispatcher, exec, delService,
+                                          diskhandler));
     doReturn(lfs)
     doReturn(lfs)
       .when(locService).getLocalFileContext(isA(Configuration.class));
       .when(locService).getLocalFileContext(isA(Configuration.class));
     try {
     try {
       dispatcher.start();
       dispatcher.start();
-      List<Path> localDirs = new ArrayList<Path>();
-      String[] sDirs = new String[4];
-      for (int i = 0; i < 4; ++i) {
-        localDirs.add(lfs.makeQualified(new Path(basedir, i + "")));
-        sDirs[i] = localDirs.get(i).toString();
-      }
-      conf.setStrings(YarnConfiguration.NM_LOCAL_DIRS, sDirs);
 
 
       // initialize ResourceLocalizationService
       // initialize ResourceLocalizationService
       locService.init(conf);
       locService.init(conf);
@@ -176,12 +182,16 @@ public class TestResourceLocalizationService {
     dispatcher.register(LocalizerEventType.class, localizerBus);
     dispatcher.register(LocalizerEventType.class, localizerBus);
 
 
     ContainerExecutor exec = mock(ContainerExecutor.class);
     ContainerExecutor exec = mock(ContainerExecutor.class);
+    LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
+    dirsHandler.init(conf);
+
     DeletionService delService = new DeletionService(exec);
     DeletionService delService = new DeletionService(exec);
     delService.init(null);
     delService.init(null);
     delService.start();
     delService.start();
 
 
     ResourceLocalizationService rawService =
     ResourceLocalizationService rawService =
-      new ResourceLocalizationService(dispatcher, exec, delService);
+      new ResourceLocalizationService(dispatcher, exec, delService,
+                                      dirsHandler);
     ResourceLocalizationService spyService = spy(rawService);
     ResourceLocalizationService spyService = spy(rawService);
     doReturn(ignore).when(spyService).createServer();
     doReturn(ignore).when(spyService).createServer();
     doReturn(mockLocallilzerTracker).when(spyService).createLocalizerTracker(
     doReturn(mockLocallilzerTracker).when(spyService).createLocalizerTracker(
@@ -356,13 +366,17 @@ public class TestResourceLocalizationService {
     dispatcher.register(ContainerEventType.class, containerBus);
     dispatcher.register(ContainerEventType.class, containerBus);
 
 
     ContainerExecutor exec = mock(ContainerExecutor.class);
     ContainerExecutor exec = mock(ContainerExecutor.class);
+    LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
+    dirsHandler.init(conf);
+
     DeletionService delServiceReal = new DeletionService(exec);
     DeletionService delServiceReal = new DeletionService(exec);
     DeletionService delService = spy(delServiceReal);
     DeletionService delService = spy(delServiceReal);
     delService.init(null);
     delService.init(null);
     delService.start();
     delService.start();
 
 
     ResourceLocalizationService rawService =
     ResourceLocalizationService rawService =
-      new ResourceLocalizationService(dispatcher, exec, delService);
+      new ResourceLocalizationService(dispatcher, exec, delService,
+                                      dirsHandler);
     ResourceLocalizationService spyService = spy(rawService);
     ResourceLocalizationService spyService = spy(rawService);
     doReturn(ignore).when(spyService).createServer();
     doReturn(ignore).when(spyService).createServer();
     doReturn(lfs).when(spyService).getLocalFileContext(isA(Configuration.class));
     doReturn(lfs).when(spyService).getLocalFileContext(isA(Configuration.class));
@@ -414,8 +428,9 @@ public class TestResourceLocalizationService {
       String appStr = ConverterUtils.toString(appId);
       String appStr = ConverterUtils.toString(appId);
       String ctnrStr = c.getContainerID().toString();
       String ctnrStr = c.getContainerID().toString();
       ArgumentCaptor<Path> tokenPathCaptor = ArgumentCaptor.forClass(Path.class);
       ArgumentCaptor<Path> tokenPathCaptor = ArgumentCaptor.forClass(Path.class);
-      verify(exec).startLocalizer(tokenPathCaptor.capture(), isA(InetSocketAddress.class),
-        eq("user0"), eq(appStr), eq(ctnrStr), isA(List.class));
+      verify(exec).startLocalizer(tokenPathCaptor.capture(),
+          isA(InetSocketAddress.class), eq("user0"), eq(appStr), eq(ctnrStr),
+          isA(List.class), isA(List.class));
       Path localizationTokenPath = tokenPathCaptor.getValue();
       Path localizationTokenPath = tokenPathCaptor.getValue();
 
 
       // heartbeat from localizer
       // heartbeat from localizer

+ 6 - 3
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java

@@ -122,7 +122,8 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
     dispatcher.register(ApplicationEventType.class, appEventHandler);
     dispatcher.register(ApplicationEventType.class, appEventHandler);
     
     
     LogAggregationService logAggregationService =
     LogAggregationService logAggregationService =
-        new LogAggregationService(dispatcher, this.context, this.delSrvc);
+        new LogAggregationService(dispatcher, this.context, this.delSrvc,
+                                  super.dirsHandler);
     logAggregationService.init(this.conf);
     logAggregationService.init(this.conf);
     logAggregationService.start();
     logAggregationService.start();
 
 
@@ -189,7 +190,8 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
     dispatcher.register(ApplicationEventType.class, appEventHandler);
     dispatcher.register(ApplicationEventType.class, appEventHandler);
     
     
     LogAggregationService logAggregationService =
     LogAggregationService logAggregationService =
-        new LogAggregationService(dispatcher, this.context, this.delSrvc);
+        new LogAggregationService(dispatcher, this.context, this.delSrvc,
+                                  super.dirsHandler);
     logAggregationService.init(this.conf);
     logAggregationService.init(this.conf);
     logAggregationService.start();
     logAggregationService.start();
 
 
@@ -237,7 +239,8 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
     dispatcher.register(ApplicationEventType.class, appEventHandler);
     dispatcher.register(ApplicationEventType.class, appEventHandler);
     
     
     LogAggregationService logAggregationService =
     LogAggregationService logAggregationService =
-        new LogAggregationService(dispatcher, this.context, this.delSrvc);
+        new LogAggregationService(dispatcher, this.context, this.delSrvc,
+                                  super.dirsHandler);
     logAggregationService.init(this.conf);
     logAggregationService.init(this.conf);
     logAggregationService.start();
     logAggregationService.start();
 
 

+ 12 - 4
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/loghandler/TestNonAggregatingLogHandler.java

@@ -37,6 +37,7 @@ import org.apache.hadoop.yarn.event.DrainDispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.logaggregation.ContainerLogsRetentionPolicy;
 import org.apache.hadoop.yarn.logaggregation.ContainerLogsRetentionPolicy;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppFinishedEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppFinishedEvent;
@@ -74,13 +75,16 @@ public class TestNonAggregatingLogHandler {
     EventHandler<ApplicationEvent> appEventHandler = mock(EventHandler.class);
     EventHandler<ApplicationEvent> appEventHandler = mock(EventHandler.class);
     dispatcher.register(ApplicationEventType.class, appEventHandler);
     dispatcher.register(ApplicationEventType.class, appEventHandler);
 
 
+    LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
+    dirsHandler.init(conf);
+
     ApplicationId appId1 = BuilderUtils.newApplicationId(1234, 1);
     ApplicationId appId1 = BuilderUtils.newApplicationId(1234, 1);
     ApplicationAttemptId appAttemptId1 =
     ApplicationAttemptId appAttemptId1 =
         BuilderUtils.newApplicationAttemptId(appId1, 1);
         BuilderUtils.newApplicationAttemptId(appId1, 1);
     ContainerId container11 = BuilderUtils.newContainerId(appAttemptId1, 1);
     ContainerId container11 = BuilderUtils.newContainerId(appAttemptId1, 1);
 
 
     NonAggregatingLogHandler logHandler =
     NonAggregatingLogHandler logHandler =
-        new NonAggregatingLogHandler(dispatcher, delService);
+        new NonAggregatingLogHandler(dispatcher, delService, dirsHandler);
     logHandler.init(conf);
     logHandler.init(conf);
     logHandler.start();
     logHandler.start();
 
 
@@ -146,13 +150,17 @@ public class TestNonAggregatingLogHandler {
     EventHandler<ApplicationEvent> appEventHandler = mock(EventHandler.class);
     EventHandler<ApplicationEvent> appEventHandler = mock(EventHandler.class);
     dispatcher.register(ApplicationEventType.class, appEventHandler);
     dispatcher.register(ApplicationEventType.class, appEventHandler);
 
 
+    LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
+    dirsHandler.init(conf);
+
     ApplicationId appId1 = BuilderUtils.newApplicationId(1234, 1);
     ApplicationId appId1 = BuilderUtils.newApplicationId(1234, 1);
     ApplicationAttemptId appAttemptId1 =
     ApplicationAttemptId appAttemptId1 =
         BuilderUtils.newApplicationAttemptId(appId1, 1);
         BuilderUtils.newApplicationAttemptId(appId1, 1);
     ContainerId container11 = BuilderUtils.newContainerId(appAttemptId1, 1);
     ContainerId container11 = BuilderUtils.newContainerId(appAttemptId1, 1);
 
 
     NonAggregatingLogHandler logHandler =
     NonAggregatingLogHandler logHandler =
-        new NonAggregatingLogHandlerWithMockExecutor(dispatcher, delService);
+        new NonAggregatingLogHandlerWithMockExecutor(dispatcher, delService,
+                                                     dirsHandler);
     logHandler.init(conf);
     logHandler.init(conf);
     logHandler.start();
     logHandler.start();
 
 
@@ -182,8 +190,8 @@ public class TestNonAggregatingLogHandler {
     private ScheduledThreadPoolExecutor mockSched;
     private ScheduledThreadPoolExecutor mockSched;
 
 
     public NonAggregatingLogHandlerWithMockExecutor(Dispatcher dispatcher,
     public NonAggregatingLogHandlerWithMockExecutor(Dispatcher dispatcher,
-        DeletionService delService) {
-      super(dispatcher, delService);
+        DeletionService delService, LocalDirsHandlerService dirsHandler) {
+      super(dispatcher, delService, dirsHandler);
     }
     }
 
 
     @Override
     @Override

+ 25 - 7
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java

@@ -27,6 +27,7 @@ import java.io.IOException;
 import java.io.Writer;
 import java.io.Writer;
 
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -37,6 +38,8 @@ import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
+import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService;
 import org.apache.hadoop.yarn.server.nodemanager.NodeManager;
 import org.apache.hadoop.yarn.server.nodemanager.NodeManager;
 import org.apache.hadoop.yarn.server.nodemanager.ResourceView;
 import org.apache.hadoop.yarn.server.nodemanager.ResourceView;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
@@ -47,6 +50,7 @@ import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.junit.After;
 import org.junit.Before;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.Test;
 
 
@@ -54,10 +58,19 @@ public class TestNMWebServer {
 
 
   private static final File testRootDir = new File("target",
   private static final File testRootDir = new File("target",
       TestNMWebServer.class.getSimpleName());
       TestNMWebServer.class.getSimpleName());
+  private static File testLogDir = new File("target",
+      TestNMWebServer.class.getSimpleName() + "LogDir");
 
 
   @Before
   @Before
   public void setup() {
   public void setup() {
     testRootDir.mkdirs();
     testRootDir.mkdirs();
+    testLogDir.mkdir(); 
+  }
+
+  @After
+  public void tearDown() {
+    FileUtil.fullyDelete(testRootDir);
+    FileUtil.fullyDelete(testLogDir);
   }
   }
 
 
   @Test
   @Test
@@ -74,9 +87,14 @@ public class TestNMWebServer {
       }
       }
     };
     };
     Configuration conf = new Configuration();
     Configuration conf = new Configuration();
-    WebServer server = new WebServer(nmContext, resourceView,
-        new ApplicationACLsManager(conf));
     conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath());
     conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath());
+    conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath());
+    NodeHealthCheckerService healthChecker = new NodeHealthCheckerService();
+    healthChecker.init(conf);
+    LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler();
+
+    WebServer server = new WebServer(nmContext, resourceView,
+        new ApplicationACLsManager(conf), dirsHandler);
     server.init(conf);
     server.init(conf);
     server.start();
     server.start();
 
 
@@ -119,20 +137,20 @@ public class TestNMWebServer {
           containerId.getApplicationAttemptId().getApplicationId();
           containerId.getApplicationAttemptId().getApplicationId();
       nmContext.getApplications().get(applicationId).getContainers()
       nmContext.getApplications().get(applicationId).getContainers()
           .put(containerId, container);
           .put(containerId, container);
-      writeContainerLogs(conf, nmContext, containerId);
+      writeContainerLogs(nmContext, containerId, dirsHandler);
 
 
     }
     }
     // TODO: Pull logs and test contents.
     // TODO: Pull logs and test contents.
 //    Thread.sleep(1000000);
 //    Thread.sleep(1000000);
   }
   }
 
 
-  private void writeContainerLogs(Configuration conf, Context nmContext,
-      ContainerId containerId)
+  private void writeContainerLogs(Context nmContext,
+      ContainerId containerId, LocalDirsHandlerService dirsHandler)
         throws IOException {
         throws IOException {
     // ContainerLogDir should be created
     // ContainerLogDir should be created
     File containerLogDir =
     File containerLogDir =
-        ContainerLogsPage.ContainersLogsBlock.getContainerLogDirs(conf,
-            containerId).get(0);
+        ContainerLogsPage.ContainersLogsBlock.getContainerLogDirs(containerId,
+            dirsHandler).get(0);
     containerLogDir.mkdirs();
     containerLogDir.mkdirs();
     for (String fileType : new String[] { "stdout", "stderr", "syslog" }) {
     for (String fileType : new String[] { "stdout", "stderr", "syslog" }) {
       Writer writer = new FileWriter(new File(containerLogDir, fileType));
       Writer writer = new FileWriter(new File(containerLogDir, fileType));

+ 43 - 20
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java

@@ -23,7 +23,6 @@ import java.io.IOException;
 
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.NodeHealthCheckerService;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
@@ -41,6 +40,7 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
 import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
 import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
 import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse;
 import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService;
 import org.apache.hadoop.yarn.server.nodemanager.NodeManager;
 import org.apache.hadoop.yarn.server.nodemanager.NodeManager;
 import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
 import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
 import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl;
 import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl;
@@ -51,7 +51,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
 import org.apache.hadoop.yarn.server.security.ContainerTokenSecretManager;
 import org.apache.hadoop.yarn.server.security.ContainerTokenSecretManager;
 import org.apache.hadoop.yarn.service.AbstractService;
 import org.apache.hadoop.yarn.service.AbstractService;
 import org.apache.hadoop.yarn.service.CompositeService;
 import org.apache.hadoop.yarn.service.CompositeService;
-import org.apache.hadoop.yarn.service.Service.STATE;
 
 
 public class MiniYARNCluster extends CompositeService {
 public class MiniYARNCluster extends CompositeService {
 
 
@@ -69,13 +68,23 @@ public class MiniYARNCluster extends CompositeService {
   
   
   private File testWorkDir;
   private File testWorkDir;
 
 
-  public MiniYARNCluster(String testName) {
-    //default number of nodeManagers = 1
-    this(testName, 1);
-  }
+  // Number of nm-local-dirs per nodemanager
+  private int numLocalDirs;
+  // Number of nm-log-dirs per nodemanager
+  private int numLogDirs;
+
+  /**
+   * @param testName name of the test
+   * @param noOfNodeManagers the number of node managers in the cluster
+   * @param numLocalDirs the number of nm-local-dirs per nodemanager
+   * @param numLogDirs the number of nm-log-dirs per nodemanager
+   */
+  public MiniYARNCluster(String testName, int noOfNodeManagers,
+                         int numLocalDirs, int numLogDirs) {
 
 
-  public MiniYARNCluster(String testName, int noOfNodeManagers) {
     super(testName);
     super(testName);
+    this.numLocalDirs = numLocalDirs;
+    this.numLogDirs = numLogDirs;
     this.testWorkDir = new File("target", testName);
     this.testWorkDir = new File("target", testName);
     try {
     try {
       FileContext.getLocalFSFileContext().delete(
       FileContext.getLocalFSFileContext().delete(
@@ -166,25 +175,39 @@ public class MiniYARNCluster extends CompositeService {
       super.init(config);                                                        
       super.init(config);                                                        
     }                                                                            
     }                                                                            
 
 
+    /**
+     * Create local/log directories
+     * @param dirType type of directories i.e. local dirs or log dirs 
+     * @param numDirs number of directories
+     * @return the created directories as a comma delimited String
+     */
+    private String prepareDirs(String dirType, int numDirs) {
+      File []dirs = new File[numDirs];
+      String dirsString = "";
+      for (int i = 0; i < numDirs; i++) {
+        dirs[i]= new File(testWorkDir, MiniYARNCluster.this.getName()
+            + "-" + dirType + "Dir-nm-" + index + "_" + i);
+        dirs[i].mkdir();
+        LOG.info("Created " + dirType + "Dir in " + dirs[i].getAbsolutePath());
+        String delimiter = (i > 0) ? "," : "";
+        dirsString = dirsString.concat(delimiter + dirs[i].getAbsolutePath());
+      }
+      return dirsString;
+    }
+
     public synchronized void start() {
     public synchronized void start() {
       try {
       try {
-        File localDir = new File(testWorkDir, MiniYARNCluster.this.getName()
-            + "-localDir-nm-" + index);
-        localDir.mkdir();
-        LOG.info("Created localDir in " + localDir.getAbsolutePath());
-        getConfig().set(YarnConfiguration.NM_LOCAL_DIRS,
-            localDir.getAbsolutePath());
-        File logDir =
-            new File(testWorkDir, MiniYARNCluster.this.getName()
-                + "-logDir-nm-" + index);
+        // create nm-local-dirs and configure them for the nodemanager
+        String localDirsString = prepareDirs("local", numLocalDirs);
+        getConfig().set(YarnConfiguration.NM_LOCAL_DIRS, localDirsString);
+        // create nm-log-dirs and configure them for the nodemanager
+        String logDirsString = prepareDirs("log", numLogDirs);
+        getConfig().set(YarnConfiguration.NM_LOG_DIRS, logDirsString);
+
         File remoteLogDir =
         File remoteLogDir =
             new File(testWorkDir, MiniYARNCluster.this.getName()
             new File(testWorkDir, MiniYARNCluster.this.getName()
                 + "-remoteLogDir-nm-" + index);
                 + "-remoteLogDir-nm-" + index);
-        logDir.mkdir();
         remoteLogDir.mkdir();
         remoteLogDir.mkdir();
-        LOG.info("Created logDir in " + logDir.getAbsolutePath());
-        getConfig().set(YarnConfiguration.NM_LOG_DIRS,
-            logDir.getAbsolutePath());
         getConfig().set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
         getConfig().set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
             	remoteLogDir.getAbsolutePath());
             	remoteLogDir.getAbsolutePath());
         // By default AM + 2 containers
         // By default AM + 2 containers

+ 1 - 1
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java

@@ -117,7 +117,7 @@ public class TestContainerManagerSecurity {
     conf.setLong(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 100000L);
     conf.setLong(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 100000L);
     UserGroupInformation.setConfiguration(conf);
     UserGroupInformation.setConfiguration(conf);
     yarnCluster = new MiniYARNCluster(TestContainerManagerSecurity.class
     yarnCluster = new MiniYARNCluster(TestContainerManagerSecurity.class
-        .getName());
+        .getName(), 1, 1, 1);
     yarnCluster.init(conf);
     yarnCluster.init(conf);
     yarnCluster.start();
     yarnCluster.start();
   }
   }

+ 247 - 0
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java

@@ -0,0 +1,247 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.UnsupportedFileSystemException;
+import org.apache.hadoop.security.AccessControlException;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.server.MiniYARNCluster;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
+import org.apache.hadoop.yarn.server.nodemanager.NodeManager;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import junit.framework.Assert;
+
+/**
+ * Verify if NodeManager's in-memory good local dirs list and good log dirs list
+ * get updated properly when disks(nm-local-dirs and nm-log-dirs) fail. Also
+ * verify if the overall health status of the node gets updated properly when
+ * specified percentage of disks fail.
+ */
+public class TestDiskFailures {
+
+  private static final Log LOG = LogFactory.getLog(TestDiskFailures.class);
+
+  private static final long DISK_HEALTH_CHECK_INTERVAL = 1000;//1 sec
+
+  private static FileContext localFS = null;
+  private static final File testDir = new File("target",
+      TestDiskFailures.class.getName()).getAbsoluteFile();
+  private static final File localFSDirBase = new File(testDir,
+      TestDiskFailures.class.getName() + "-localDir");
+  private static final int numLocalDirs = 4;
+  private static final int numLogDirs = 4;
+
+  private static MiniYARNCluster yarnCluster;
+  LocalDirsHandlerService dirsHandler;
+
+  @BeforeClass
+  public static void setup() throws AccessControlException,
+      FileNotFoundException, UnsupportedFileSystemException, IOException {
+    localFS = FileContext.getLocalFSFileContext();
+    localFS.delete(new Path(localFSDirBase.getAbsolutePath()), true);
+    localFSDirBase.mkdirs();
+    // Do not start cluster here
+  }
+
+  @AfterClass
+  public static void teardown() {
+    if (yarnCluster != null) {
+      yarnCluster.stop();
+      yarnCluster = null;
+    }
+    FileUtil.fullyDelete(localFSDirBase);
+  }
+
+  /**
+   * Make local-dirs fail/inaccessible and verify if NodeManager can
+   * recognize the disk failures properly and can update the list of
+   * local-dirs accordingly with good disks. Also verify the overall
+   * health status of the node.
+   * @throws IOException
+   */
+  @Test
+  public void testLocalDirsFailures() throws IOException {
+    testDirsFailures(true);
+  }
+
+  /**
+   * Make log-dirs fail/inaccessible and verify if NodeManager can
+   * recognize the disk failures properly and can update the list of
+   * log-dirs accordingly with good disks. Also verify the overall health
+   * status of the node.
+   * @throws IOException
+   */  
+  @Test
+  public void testLogDirsFailures() throws IOException {
+    testDirsFailures(false);
+  }
+
+  private void testDirsFailures(boolean localORLogDirs) throws IOException {
+    String dirType = localORLogDirs ? "local" : "log";
+    String dirsProperty = localORLogDirs ? YarnConfiguration.NM_LOCAL_DIRS
+                                         : YarnConfiguration.NM_LOG_DIRS;
+
+    Configuration conf = new Configuration();
+    // set disk health check interval to a small value (say 1 sec).
+    conf.setLong(YarnConfiguration.NM_DISK_HEALTH_CHECK_INTERVAL_MS,
+                 DISK_HEALTH_CHECK_INTERVAL);
+
+    // If 2 out of the total 4 local-dirs fail OR if 2 Out of the total 4
+    // log-dirs fail, then the node's health status should become unhealthy.
+    conf.setFloat(YarnConfiguration.NM_MIN_HEALTHY_DISKS_FRACTION, 0.60F);
+
+    if (yarnCluster != null) {
+      yarnCluster.stop();
+      FileUtil.fullyDelete(localFSDirBase);
+      localFSDirBase.mkdirs();
+    }
+    LOG.info("Starting up YARN cluster");
+    yarnCluster = new MiniYARNCluster(TestDiskFailures.class.getName(),
+        1, numLocalDirs, numLogDirs);
+    yarnCluster.init(conf);
+    yarnCluster.start();
+
+    NodeManager nm = yarnCluster.getNodeManager(0);
+    LOG.info("Configured nm-" + dirType + "-dirs="
+             + nm.getConfig().get(dirsProperty));
+    dirsHandler = nm.getNodeHealthChecker().getDiskHandler();
+    List<String> list = localORLogDirs ? dirsHandler.getLocalDirs()
+                                       : dirsHandler.getLogDirs();
+    String[] dirs = list.toArray(new String[list.size()]);
+    Assert.assertEquals("Number of nm-" + dirType + "-dirs is wrong.",
+                        numLocalDirs, dirs.length);
+    String expectedDirs = StringUtils.join(",", list);
+    // validate the health of disks initially
+    verifyDisksHealth(localORLogDirs, expectedDirs, true);
+
+    // Make 1 nm-local-dir fail and verify if "the nodemanager can identify
+    // the disk failure(s) and can update the list of good nm-local-dirs.
+    prepareDirToFail(dirs[2]);
+    expectedDirs = dirs[0] + "," + dirs[1] + ","
+                                 + dirs[3];
+    verifyDisksHealth(localORLogDirs, expectedDirs, true);
+
+    // Now, make 1 more nm-local-dir/nm-log-dir fail and verify if "the
+    // nodemanager can identify the disk failures and can update the list of
+    // good nm-local-dirs/nm-log-dirs and can update the overall health status
+    // of the node to unhealthy".
+    prepareDirToFail(dirs[0]);
+    expectedDirs = dirs[1] + "," + dirs[3];
+    verifyDisksHealth(localORLogDirs, expectedDirs, false);
+
+    // Fail the remaining 2 local-dirs/log-dirs and verify if NM remains with
+    // empty list of local-dirs/log-dirs and the overall health status is
+    // unhealthy.
+    prepareDirToFail(dirs[1]);
+    prepareDirToFail(dirs[3]);
+    expectedDirs = "";
+    verifyDisksHealth(localORLogDirs, expectedDirs, false);
+  }
+
+  /**
+   * Wait for the NodeManger to go for the disk-health-check at least once.
+   */
+  private void waitForDiskHealthCheck() {
+    long lastDisksCheckTime = dirsHandler.getLastDisksCheckTime();
+    long time = lastDisksCheckTime;
+    for (int i = 0; i < 10 && (time <= lastDisksCheckTime); i++) {
+      try {
+        Thread.sleep(1000);
+      } catch(InterruptedException e) {
+        LOG.error(
+            "Interrupted while waiting for NodeManager's disk health check.");
+      }
+      time = dirsHandler.getLastDisksCheckTime();
+    }
+  }
+
+  /**
+   * Verify if the NodeManager could identify disk failures.
+   * @param localORLogDirs <em>true</em> represent nm-local-dirs and <em>false
+   *                       </em> means nm-log-dirs
+   * @param expectedDirs expected nm-local-dirs/nm-log-dirs as a string
+   * @param isHealthy <em>true</em> if the overall node should be healthy
+   */
+  private void verifyDisksHealth(boolean localORLogDirs, String expectedDirs,
+      boolean isHealthy) {
+    // Wait for the NodeManager to identify disk failures.
+    waitForDiskHealthCheck();
+
+    List<String> list = localORLogDirs ? dirsHandler.getLocalDirs()
+                                       : dirsHandler.getLogDirs();
+    String seenDirs = StringUtils.join(",", list);
+    LOG.info("ExpectedDirs=" + expectedDirs);
+    LOG.info("SeenDirs=" + seenDirs);
+    Assert.assertTrue("NodeManager could not identify disk failure.",
+                      expectedDirs.equals(seenDirs));
+
+    Assert.assertEquals("Node's health in terms of disks is wrong",
+                        isHealthy, dirsHandler.areDisksHealthy());
+    for (int i = 0; i < 10; i++) {
+      Iterator<RMNode> iter = yarnCluster.getResourceManager().getRMContext()
+                              .getRMNodes().values().iterator();
+      if (iter.next().getNodeHealthStatus().getIsNodeHealthy() == isHealthy) {
+        break;
+      }
+      // wait for the node health info to go to RM
+      try {
+        Thread.sleep(1000);
+      } catch(InterruptedException e) {
+        LOG.error("Interrupted while waiting for NM->RM heartbeat.");
+      }
+    }
+    Iterator<RMNode> iter = yarnCluster.getResourceManager().getRMContext()
+                            .getRMNodes().values().iterator();
+    Assert.assertEquals("RM is not updated with the health status of a node",
+        isHealthy, iter.next().getNodeHealthStatus().getIsNodeHealthy());
+  }
+
+  /**
+   * Prepare directory for a failure: Replace the given directory on the
+   * local FileSystem with a regular file with the same name.
+   * This would cause failure of creation of directory in DiskChecker.checkDir()
+   * with the same name.
+   * @param dir the directory to be failed
+   * @throws IOException 
+   */
+  private void prepareDirToFail(String dir) throws IOException {
+    File file = new File(dir);
+    FileUtil.fullyDelete(file);
+    file.createNewFile();
+    LOG.info("Prepared " + dir + " to fail.");
+  }
+}

+ 9 - 15
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm

@@ -398,6 +398,15 @@ Hadoop MapReduce Next Generation - Cluster Setup
 | | | Timeout for health script execution. |
 | | | Timeout for health script execution. |
 *-------------------------+-------------------------+------------------------+
 *-------------------------+-------------------------+------------------------+
 
 
+    The health checker script is not supposed to give ERROR if only some of the
+    local disks become bad. NodeManager has the ability to periodically check
+    the health of the local disks (specifically checks nodemanager-local-dirs
+    and nodemanager-log-dirs) and after reaching the threshold of number of
+    bad directories based on the value set for the config property
+    yarn.nodemanager.disk-health-checker.min-healthy-disks. The boot disk is
+    either raided or a failure in the boot disk is identified by the health
+    checker script.
+
     * {Slaves file}
     * {Slaves file}
       
       
     Typically you choose one machine in the cluster to act as the NameNode and 
     Typically you choose one machine in the cluster to act as the NameNode and 
@@ -874,13 +883,6 @@ KVNO Timestamp         Principal
 *-------------------------+-------------------------+------------------------+
 *-------------------------+-------------------------+------------------------+
 || Parameter              || Value                  || Notes                 |
 || Parameter              || Value                  || Notes                 |
 *-------------------------+-------------------------+------------------------+
 *-------------------------+-------------------------+------------------------+
-| <<<yarn.nodemanager.local-dirs>>> | |
-| | Comma-separated list of NodeManager local directories. | |
-| | | Paths to NodeManager local directories. Should be same as the value |
-| | | which was provided to key in <<<conf/yarn-site.xml>>>. This is |
-| | | required to validate paths passed to the setuid executable in order |
-| | to prevent arbitrary paths being passed to it. |
-*-------------------------+-------------------------+------------------------+
 | <<<yarn.nodemanager.linux-container-executor.group>>> | <hadoop> | |
 | <<<yarn.nodemanager.linux-container-executor.group>>> | <hadoop> | |
 | | | Unix group of the NodeManager. The group owner of the |
 | | | Unix group of the NodeManager. The group owner of the |
 | | |<container-executor> binary should be this group. Should be same as the |
 | | |<container-executor> binary should be this group. Should be same as the |
@@ -888,14 +890,6 @@ KVNO Timestamp         Principal
 | | | required for validating the secure access of the <container-executor> |
 | | | required for validating the secure access of the <container-executor> |
 | | | binary. |        
 | | | binary. |        
 *-------------------------+-------------------------+------------------------+
 *-------------------------+-------------------------+------------------------+
-| <<<yarn.nodemanager.log-dirs>>> | |
-| | Comma-separated list of NodeManager log directories. | |
-| | | Paths to NodeManager log directories. Should be same as the value |
-| | | which was provided to key in <<<conf/yarn-site.xml>>>. This is |
-| | | required to set proper permissions on the log files so that they can |
-| | | be written to by the user's containers and read by the NodeManager for |
-| | | <log aggregation>. |
-*-------------------------+-------------------------+------------------------+
 | <<<banned.users>>> | hfds,yarn,mapred,bin | Banned users. |
 | <<<banned.users>>> | hfds,yarn,mapred,bin | Banned users. |
 *-------------------------+-------------------------+------------------------+
 *-------------------------+-------------------------+------------------------+
 | <<<min.user.id>>> | 1000 | Prevent other super-users. |      
 | <<<min.user.id>>> | 1000 | Prevent other super-users. |      

+ 3 - 3
hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobInProgress.java

@@ -2685,7 +2685,7 @@ public class JobInProgress {
       MapAttemptFinishedEvent mfe = new MapAttemptFinishedEvent(
       MapAttemptFinishedEvent mfe = new MapAttemptFinishedEvent(
           statusAttemptID, taskType, TaskStatus.State.SUCCEEDED.toString(),
           statusAttemptID, taskType, TaskStatus.State.SUCCEEDED.toString(),
           status.getMapFinishTime(),
           status.getMapFinishTime(),
-          status.getFinishTime(),  trackerHostname, "",
+          status.getFinishTime(),  trackerHostname, -1, "",
           status.getStateString(), 
           status.getStateString(), 
           new org.apache.hadoop.mapreduce.Counters(status.getCounters()),
           new org.apache.hadoop.mapreduce.Counters(status.getCounters()),
           tip.getSplits(statusAttemptID).burst()
           tip.getSplits(statusAttemptID).burst()
@@ -2698,7 +2698,7 @@ public class JobInProgress {
           statusAttemptID, taskType, TaskStatus.State.SUCCEEDED.toString(), 
           statusAttemptID, taskType, TaskStatus.State.SUCCEEDED.toString(), 
           status.getShuffleFinishTime(),
           status.getShuffleFinishTime(),
           status.getSortFinishTime(), status.getFinishTime(),
           status.getSortFinishTime(), status.getFinishTime(),
-          trackerHostname, "", status.getStateString(),
+          trackerHostname, -1, "", status.getStateString(),
           new org.apache.hadoop.mapreduce.Counters(status.getCounters()),
           new org.apache.hadoop.mapreduce.Counters(status.getCounters()),
           tip.getSplits(statusAttemptID).burst()
           tip.getSplits(statusAttemptID).burst()
           );
           );
@@ -3208,7 +3208,7 @@ public class JobInProgress {
             (taskid, 
             (taskid, 
              taskType, taskStatus.getRunState().toString(),
              taskType, taskStatus.getRunState().toString(),
              finishTime, 
              finishTime, 
-             taskTrackerHostName, diagInfo,
+             taskTrackerHostName, -1, diagInfo,
              splits.burst());
              splits.burst());
     jobHistory.logEvent(tue, taskid.getJobID());
     jobHistory.logEvent(tue, taskid.getJobID());
         
         

+ 4 - 0
hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestCombineOutputCollector.java

@@ -102,6 +102,8 @@ public class TestCombineOutputCollector {
   public void testCustomCollect() throws Throwable {
   public void testCustomCollect() throws Throwable {
     //mock creation
     //mock creation
     TaskReporter mockTaskReporter = mock(TaskReporter.class);
     TaskReporter mockTaskReporter = mock(TaskReporter.class);
+
+    @SuppressWarnings("unchecked")
     Writer<String, Integer> mockWriter = mock(Writer.class);
     Writer<String, Integer> mockWriter = mock(Writer.class);
 
 
     Configuration conf = new Configuration();
     Configuration conf = new Configuration();
@@ -122,6 +124,8 @@ public class TestCombineOutputCollector {
   public void testDefaultCollect() throws Throwable {
   public void testDefaultCollect() throws Throwable {
     //mock creation
     //mock creation
     TaskReporter mockTaskReporter = mock(TaskReporter.class);
     TaskReporter mockTaskReporter = mock(TaskReporter.class);
+
+    @SuppressWarnings("unchecked")
     Writer<String, Integer> mockWriter = mock(Writer.class);
     Writer<String, Integer> mockWriter = mock(Writer.class);
 
 
     Configuration conf = new Configuration();
     Configuration conf = new Configuration();

+ 1 - 1
hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEvents.java

@@ -83,7 +83,7 @@ public class TestJobHistoryEvents extends TestCase {
     for (TaskType t : types) {
     for (TaskType t : types) {
       TaskAttemptUnsuccessfulCompletionEvent tauce = 
       TaskAttemptUnsuccessfulCompletionEvent tauce = 
         new TaskAttemptUnsuccessfulCompletionEvent
         new TaskAttemptUnsuccessfulCompletionEvent
-           (id, t, state, 0L, "", "", NULL_SPLITS_ARRAY);
+           (id, t, state, 0L, "", -1, "", NULL_SPLITS_ARRAY);
       assertEquals(expected, tauce.getEventType());
       assertEquals(expected, tauce.getEventType());
     }
     }
   }
   }

+ 2 - 2
hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java

@@ -938,12 +938,12 @@ public class TestRumenJobTraces {
                     (TaskAttemptID.forName("attempt_200904211745_0003_m_000004_1"),
                     (TaskAttemptID.forName("attempt_200904211745_0003_m_000004_1"),
                      TaskType.valueOf("MAP"), "STATUS", 1234567890L,
                      TaskType.valueOf("MAP"), "STATUS", 1234567890L,
                      "/194\\.6\\.134\\.80/cluster50262\\.secondleveldomain\\.com",
                      "/194\\.6\\.134\\.80/cluster50262\\.secondleveldomain\\.com",
-                     "MACHINE_EXPLODED", splits));
+                     -1, "MACHINE_EXPLODED", splits));
     subject.process(new TaskAttemptUnsuccessfulCompletionEvent
     subject.process(new TaskAttemptUnsuccessfulCompletionEvent
                     (TaskAttemptID.forName("attempt_200904211745_0003_m_000004_2"),
                     (TaskAttemptID.forName("attempt_200904211745_0003_m_000004_2"),
                      TaskType.valueOf("MAP"), "STATUS", 1234567890L,
                      TaskType.valueOf("MAP"), "STATUS", 1234567890L,
                      "/194\\.6\\.134\\.80/cluster50263\\.secondleveldomain\\.com",
                      "/194\\.6\\.134\\.80/cluster50263\\.secondleveldomain\\.com",
-                     "MACHINE_EXPLODED", splits));
+                     -1, "MACHINE_EXPLODED", splits));
     subject.process(new TaskStartedEvent(TaskID
     subject.process(new TaskStartedEvent(TaskID
         .forName("task_200904211745_0003_m_000004"), 1234567890L, TaskType
         .forName("task_200904211745_0003_m_000004"), 1234567890L, TaskType
         .valueOf("MAP"),
         .valueOf("MAP"),

+ 1 - 1
hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/MapAttempt20LineHistoryEventEmitter.java

@@ -73,7 +73,7 @@ public class MapAttempt20LineHistoryEventEmitter extends
               that.originalTaskType, status,
               that.originalTaskType, status,
              Long.parseLong(finishTime),
              Long.parseLong(finishTime),
              Long.parseLong(finishTime),
              Long.parseLong(finishTime),
-             hostName, null, state, maybeParseCounters(counters),
+             hostName, -1, null, state, maybeParseCounters(counters),
              null);
              null);
         }
         }
       }
       }

+ 1 - 1
hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/ReduceAttempt20LineHistoryEventEmitter.java

@@ -77,7 +77,7 @@ public class ReduceAttempt20LineHistoryEventEmitter
              Long.parseLong(shuffleFinish),
              Long.parseLong(shuffleFinish),
              Long.parseLong(sortFinish),
              Long.parseLong(sortFinish),
              Long.parseLong(finishTime),
              Long.parseLong(finishTime),
-             hostName, null,
+             hostName, -1, null,
              state, maybeParseCounters(counters),
              state, maybeParseCounters(counters),
              null);
              null);
         }
         }

+ 1 - 1
hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/TaskAttempt20LineEventEmitter.java

@@ -141,7 +141,7 @@ public abstract class TaskAttempt20LineEventEmitter extends HistoryEventEmitter
         return new TaskAttemptUnsuccessfulCompletionEvent
         return new TaskAttemptUnsuccessfulCompletionEvent
           (taskAttemptID,
           (taskAttemptID,
            that.originalTaskType, status, Long.parseLong(finishTime),
            that.originalTaskType, status, Long.parseLong(finishTime),
-           hostName, error, null);
+           hostName, -1, error, null);
       }
       }
 
 
       return null;
       return null;

+ 1 - 1
pom.xml

@@ -123,7 +123,7 @@
         <configuration>
         <configuration>
           <rules>
           <rules>
             <requireMavenVersion>
             <requireMavenVersion>
-              <version>[3.0.0,)</version>
+              <version>[3.0.2,)</version>
             </requireMavenVersion>
             </requireMavenVersion>
             <requireJavaVersion>
             <requireJavaVersion>
               <version>1.6</version>
               <version>1.6</version>