浏览代码

Merging r1562962 through r1565513 from trunk

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-5698@1565515 13f79535-47bb-0310-9956-ffa450edef68
Jing Zhao 11 年之前
父节点
当前提交
11f7ae9ac2
共有 100 个文件被更改,包括 4512 次插入976 次删除
  1. 124 0
      dev-support/create-release.sh
  2. 35 14
      dev-support/test-patch.sh
  3. 1 0
      hadoop-common-project/hadoop-annotations/src/main/java/org/apache/hadoop/classification/InterfaceStability.java
  4. 30 0
      hadoop-common-project/hadoop-common/CHANGES.txt
  5. 2 0
      hadoop-common-project/hadoop-common/pom.xml
  6. 2 0
      hadoop-common-project/hadoop-common/src/CMakeLists.txt
  7. 9 4
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileChecksum.java
  8. 8 7
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java
  9. 6 5
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpConfig.java
  10. 10 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java
  11. 90 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/SharedFileDescriptorFactory.java
  12. 19 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java
  13. 8 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
  14. 50 134
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
  15. 478 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocketWatcher.java
  16. 11 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ProxyUsers.java
  17. 20 9
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java
  18. 2 3
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/CompositeService.java
  19. 125 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CloseableReferenceCount.java
  20. 54 1
      hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/NativeIO.c
  21. 162 0
      hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/SharedFileDescriptorFactory.c
  22. 247 0
      hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocketWatcher.c
  23. 2 1
      hadoop-common-project/hadoop-common/src/site/apt/SecureMode.apt.vm
  24. 7 7
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyShell.java
  25. 82 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/nativeio/TestSharedFileDescriptorFactory.java
  26. 150 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocketWatcher.java
  27. 210 18
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/service/TestCompositeService.java
  28. 30 22
      hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/IdUserGroup.java
  29. 26 15
      hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/nfs/nfs3/TestIdUserGroup.java
  30. 24 8
      hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/oncrpc/TestFrameDecoder.java
  31. 26 2
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java
  32. 7 5
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java
  33. 13 1
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestReaddir.java
  34. 25 0
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestDFSClientCache.java
  35. 9 0
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java
  36. 50 0
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  37. 3 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java
  38. 6 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
  39. 8 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
  40. 8 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
  41. 80 18
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
  42. 11 5
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java
  43. 18 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java
  44. 302 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitSharedMemorySegment.java
  45. 12 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java
  46. 21 8
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
  47. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
  48. 11 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
  49. 2 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java
  50. 94 71
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
  51. 5 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java
  52. 31 13
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
  53. 4 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/SecureDataNodeStarter.java
  54. 91 53
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
  55. 167 10
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
  56. 18 5
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
  57. 0 36
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
  58. 32 7
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
  59. 7 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
  60. 2 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/CacheAdmin.java
  61. 4 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
  62. 14 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
  63. 28 1
      hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsUserGuide.apt.vm
  64. 14 7
      hadoop-hdfs-project/hadoop-hdfs/src/site/xdoc/HdfsSnapshots.xml
  65. 85 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgradeFromImage.java
  66. 86 42
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java
  67. 54 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHttpPolicy.java
  68. 104 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/client/TestShortCircuitSharedMemorySegment.java
  69. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java
  70. 2 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java
  71. 71 165
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/BlockReportTestBase.java
  72. 16 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java
  73. 205 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDnRespectsBlockReportSplitThreshold.java
  74. 42 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestNNHandlesBlockReportPerStorage.java
  75. 39 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestNNHandlesCombinedBlockReport.java
  76. 4 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeHttpServer.java
  77. 105 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeOptionParsing.java
  78. 5 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
  79. 1 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestHttpsFileSystem.java
  80. 二进制
      hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-2-reserved.tgz
  81. 9 0
      hadoop-mapreduce-project/CHANGES.txt
  82. 3 3
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
  83. 8 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java
  84. 2 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
  85. 9 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
  86. 7 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java
  87. 8 7
      hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java
  88. 1 1
      hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
  89. 3 3
      hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java
  90. 3 3
      hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java
  91. 75 32
      hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
  92. 8 6
      hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java
  93. 32 3
      hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java
  94. 92 30
      hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java
  95. 168 168
      hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json
  96. 7 0
      hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java
  97. 69 0
      hadoop-yarn-project/CHANGES.txt
  98. 1 1
      hadoop-yarn-project/hadoop-yarn/bin/yarn
  99. 1 1
      hadoop-yarn-project/hadoop-yarn/bin/yarn.cmd
  100. 38 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/ApplicationsRequestScope.java

+ 124 - 0
dev-support/create-release.sh

@@ -0,0 +1,124 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Function to probe the exit code of the script commands, 
+# and stop in the case of failure with an contextual error 
+# message.
+run() {
+  echo "\$ ${@}"
+  "${@}"
+  exitCode=$?
+  if [[ $exitCode != 0 ]]; then
+    echo
+    echo "Failed! running ${@} in `pwd`"
+    echo
+    exit $exitCode
+  fi
+}
+
+doMD5() {
+  MD5CMD="md5sum"
+  which $MD5CMD
+  if [[ $? != 0 ]]; then
+    MD5CMD="md5"
+  fi
+  run $MD5CMD ${1} > ${1}.md5
+}
+
+# If provided, the created release artifacts will be tagged with it 
+# (use RC#, i.e: RC0). Do not use a label to create the final release 
+# artifact.
+RC_LABEL=$1
+
+# Extract Hadoop version from POM
+HADOOP_VERSION=`cat pom.xml | grep "<version>" | head -1 | sed 's|^ *<version>||' | sed 's|</version>.*$||'`
+
+echo
+echo "*****************************************************************"
+echo
+echo "Hadoop version to create release artifacts: ${HADOOP_VERSION}"
+echo 
+echo "Release Candidate Label: ${RC_LABEL}"
+echo
+echo "*****************************************************************"
+echo
+
+if [[ ! -z ${RC_LABEL} ]]; then
+  RC_LABEL="-${RC_LABEL}"
+fi
+
+# Get Maven command
+if [ -z "$MAVEN_HOME" ]; then
+  MVN=mvn
+else
+  MVN=$MAVEN_HOME/bin/mvn
+fi
+
+ARTIFACTS_DIR="target/artifacts"
+
+# Create staging dir for release artifacts
+
+run mkdir -p ${ARTIFACTS_DIR}
+
+# Create RAT report
+run ${MVN} apache-rat:check
+
+# Create SRC and BIN tarballs for release,
+# Using 'install’ goal instead of 'package' so artifacts are available 
+# in the Maven local cache for the site generation
+run ${MVN} install -Pdist,docs,src,native -DskipTests -Dtar
+
+# Create site for release
+run ${MVN} site site:stage -Pdist -Psrc
+run mv target/staging/hadoop-project target/r${HADOOP_VERSION}/
+run cd target/
+run tar czf hadoop-site-${HADOOP_VERSION}.tar.gz r${HADOOP_VERSION}/*
+run cd ..
+
+# Stage RAT report
+find . -name rat.txt | xargs -I% cat % > ${ARTIFACTS_DIR}/hadoop-${HADOOP_VERSION}${RC_LABEL}-rat.txt
+
+# Stage CHANGES.txt files
+run cp ./hadoop-common-project/hadoop-common/CHANGES.txt ${ARTIFACTS_DIR}/CHANGES-COMMON-${HADOOP_VERSION}${RC_LABEL}.txt
+run cp ./hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ${ARTIFACTS_DIR}/CHANGES-HDFS--${HADOOP_VERSION}${RC_LABEL}.txt
+run cp ./hadoop-mapreduce-project/CHANGES.txt ${ARTIFACTS_DIR}/CHANGES-MAPREDUCE-${HADOOP_VERSION}${RC_LABEL}.txt
+run cp ./hadoop-yarn-project/CHANGES.txt ${ARTIFACTS_DIR}/CHANGES-YARN-${HADOOP_VERSION}${RC_LABEL}.txt
+
+# Stage BIN tarball
+run mv hadoop-dist/target/hadoop-${HADOOP_VERSION}.tar.gz ${ARTIFACTS_DIR}/hadoop-${HADOOP_VERSION}${RC_LABEL}.tar.gz
+
+# State SRC tarball
+run mv hadoop-dist/target/hadoop-${HADOOP_VERSION}-src.tar.gz ${ARTIFACTS_DIR}/hadoop-${HADOOP_VERSION}${RC_LABEL}-src.tar.gz
+
+# Stage SITE tarball
+run mv target/hadoop-site-${HADOOP_VERSION}.tar.gz ${ARTIFACTS_DIR}/hadoop-${HADOOP_VERSION}${RC_LABEL}-site.tar.gz
+
+# MD5 SRC and BIN tarballs
+doMD5 ${ARTIFACTS_DIR}/hadoop-${HADOOP_VERSION}${RC_LABEL}.tar.gz
+doMD5 ${ARTIFACTS_DIR}/hadoop-${HADOOP_VERSION}${RC_LABEL}-src.tar.gz
+
+run cd ${ARTIFACTS_DIR}
+ARTIFACTS_DIR=`pwd`
+echo
+echo "Congratulations, you have successfully built the release"
+echo "artifacts for Apache Hadoop ${HADOOP_VERSION}${RC_LABEL}"
+echo
+echo "The artifacts for this run are available at ${ARTIFACTS_DIR}:"
+run ls -1 ${ARTIFACTS_DIR}
+echo 
+echo "Remember to sign them before staging them on the open"
+echo

+ 35 - 14
dev-support/test-patch.sh

@@ -300,6 +300,17 @@ prebuildWithoutPatch () {
     {color:red}-1 patch{color}.  Trunk compilation may be broken."
     {color:red}-1 patch{color}.  Trunk compilation may be broken."
     return 1
     return 1
   fi
   fi
+
+  echo "$MVN clean test javadoc:javadoc -DskipTests -Pdocs -D${PROJECT_NAME}PatchProcess > $PATCH_DIR/trunkJavadocWarnings.txt 2>&1"
+  $MVN clean test javadoc:javadoc -DskipTests -Pdocs -D${PROJECT_NAME}PatchProcess > $PATCH_DIR/trunkJavadocWarnings.txt 2>&1
+  if [[ $? != 0 ]] ; then
+    echo "Trunk javadoc compilation is broken?"
+    JIRA_COMMENT="$JIRA_COMMENT
+
+    {color:red}-1 patch{color}.  Trunk compilation may be broken."
+    return 1
+  fi
+
   return 0
   return 0
 }
 }
 
 
@@ -401,6 +412,11 @@ applyPatch () {
 }
 }
 
 
 ###############################################################################
 ###############################################################################
+calculateJavadocWarnings() {
+    WARNING_FILE="$1"
+    RET=$(egrep "^[0-9]+ warnings$" "$WARNING_FILE" | awk '{sum+=$1} END {print sum}')
+}
+
 ### Check there are no javadoc warnings
 ### Check there are no javadoc warnings
 checkJavadocWarnings () {
 checkJavadocWarnings () {
   echo ""
   echo ""
@@ -420,24 +436,29 @@ checkJavadocWarnings () {
     (cd hadoop-common-project/hadoop-annotations; $MVN install > /dev/null 2>&1)
     (cd hadoop-common-project/hadoop-annotations; $MVN install > /dev/null 2>&1)
   fi
   fi
   $MVN clean test javadoc:javadoc -DskipTests -Pdocs -D${PROJECT_NAME}PatchProcess > $PATCH_DIR/patchJavadocWarnings.txt 2>&1
   $MVN clean test javadoc:javadoc -DskipTests -Pdocs -D${PROJECT_NAME}PatchProcess > $PATCH_DIR/patchJavadocWarnings.txt 2>&1
-  javadocWarnings=`$GREP '\[WARNING\]' $PATCH_DIR/patchJavadocWarnings.txt | $AWK '/Javadoc Warnings/,EOF' | $GREP warning | $AWK 'BEGIN {total = 0} {total += 1} END {print total}'`
-  echo ""
-  echo ""
-  echo "There appear to be $javadocWarnings javadoc warnings generated by the patched build."
-
-  #There are 12 warnings that are caused by things that are caused by using sun internal APIs.
-  #There are 2 warnings that are caused by the Apache DS Dn class used in MiniKdc.
-  OK_JAVADOC_WARNINGS=14;
-  ### if current warnings greater than OK_JAVADOC_WARNINGS
-  if [[ $javadocWarnings -ne $OK_JAVADOC_WARNINGS ]] ; then
-    JIRA_COMMENT="$JIRA_COMMENT
+  calculateJavadocWarnings "$PATCH_DIR/trunkJavadocWarnings.txt"
+  numTrunkJavadocWarnings=$RET
+  calculateJavadocWarnings "$PATCH_DIR/patchJavadocWarnings.txt"
+  numPatchJavadocWarnings=$RET
+  grep -i warning "$PATCH_DIR/trunkJavadocWarnings.txt" > "$PATCH_DIR/trunkJavadocWarningsFiltered.txt"
+  grep -i warning "$PATCH_DIR/patchJavadocWarnings.txt" > "$PATCH_DIR/patchJavadocWarningsFiltered.txt"
+  diff -u "$PATCH_DIR/trunkJavadocWarningsFiltered.txt" \
+          "$PATCH_DIR/patchJavadocWarningsFiltered.txt" > \
+          "$PATCH_DIR/diffJavadocWarnings.txt"
+  rm -f "$PATCH_DIR/trunkJavadocWarningsFiltered.txt" "$PATCH_DIR/patchJavadocWarningsFiltered.txt"
+  echo "There appear to be $numTrunkJavadocWarnings javadoc warnings before the patch and $numPatchJavadocWarnings javadoc warnings after applying the patch."
+  if [[ $numTrunkJavadocWarnings != "" && $numPatchJavadocWarnings != "" ]] ; then
+    if [[ $numPatchJavadocWarnings -gt $numTrunkJavadocWarnings ]] ; then
+      JIRA_COMMENT="$JIRA_COMMENT
 
 
-    {color:red}-1 javadoc{color}.  The javadoc tool appears to have generated `expr $(($javadocWarnings-$OK_JAVADOC_WARNINGS))` warning messages."
-    return 1
+    {color:red}-1 javadoc{color}.  The javadoc tool appears to have generated `expr $(($numPatchJavadocWarnings-$numTrunkJavadocWarnings))` warning messages.
+        See $BUILD_URL/artifact/trunk/patchprocess/diffJavadocWarnings.txt for details."
+        return 1
+    fi
   fi
   fi
   JIRA_COMMENT="$JIRA_COMMENT
   JIRA_COMMENT="$JIRA_COMMENT
 
 
-    {color:green}+1 javadoc{color}.  The javadoc tool did not generate any warning messages."
+    {color:green}+1 javadoc{color}.  There were no new javadoc warning messages."
   return 0
   return 0
 }
 }
 
 

+ 1 - 0
hadoop-common-project/hadoop-annotations/src/main/java/org/apache/hadoop/classification/InterfaceStability.java

@@ -33,6 +33,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Public;
  * <li>Classes that are {@link Private} are to be considered unstable unless
  * <li>Classes that are {@link Private} are to be considered unstable unless
  * a different InterfaceStability annotation states otherwise.</li>
  * a different InterfaceStability annotation states otherwise.</li>
  * <li>Incompatible changes must not be made to classes marked as stable.</li>
  * <li>Incompatible changes must not be made to classes marked as stable.</li>
+ * </ul>
  */
  */
 @InterfaceAudience.Public
 @InterfaceAudience.Public
 @InterfaceStability.Evolving
 @InterfaceStability.Evolving

+ 30 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -113,6 +113,11 @@ Trunk (Unreleased)
 
 
     HADOOP-10177. Create CLI tools for managing keys. (Larry McCay via omalley)
     HADOOP-10177. Create CLI tools for managing keys. (Larry McCay via omalley)
 
 
+    HADOOP-10244. TestKeyShell improperly tests the results of delete (Larry
+    McCay via omalley)
+
+    HADOOP-10325. Improve jenkins javadoc warnings from test-patch.sh (cmccabe)
+
   BUG FIXES
   BUG FIXES
 
 
     HADOOP-9451. Fault single-layer config if node group topology is enabled.
     HADOOP-9451. Fault single-layer config if node group topology is enabled.
@@ -304,10 +309,25 @@ Release 2.4.0 - UNRELEASED
     HADOOP-10139. Update and improve the Single Cluster Setup document.
     HADOOP-10139. Update and improve the Single Cluster Setup document.
     (Akira Ajisaka via Arpit Agarwal)
     (Akira Ajisaka via Arpit Agarwal)
 
 
+    HADOOP-10295. Allow distcp to automatically identify the checksum type of 
+    source files and use it for the target. (jing9 and Laurent Goujon)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
   BUG FIXES
   BUG FIXES
 
 
+    HADOOP-10320. Javadoc in InterfaceStability.java lacks final </ul>.
+    (René Nyffenegger via cnauroth)
+    
+    HADOOP-10085. CompositeService should allow adding services while being 
+    inited. (Steve Loughran via kasha)
+
+    HADOOP-10327. Trunk windows build broken after HDFS-5746.
+    (Vinay via cnauroth)
+
+    HADOOP-10330. TestFrameDecoder fails if it cannot bind port 12345.
+    (Arpit Agarwal)
+
 Release 2.3.0 - UNRELEASED
 Release 2.3.0 - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES
@@ -464,6 +484,12 @@ Release 2.3.0 - UNRELEASED
     "rpc.metrics.percentiles.intervals" to core-default.xml.
     "rpc.metrics.percentiles.intervals" to core-default.xml.
     (Akira Ajisaka via wang)
     (Akira Ajisaka via wang)
 
 
+    HADOOP-10317. Rename branch-2.3 release version from 2.4.0-SNAPSHOT
+    to 2.3.0-SNAPSHOT. (wang)
+
+    HADOOP-10313. Script and jenkins job to produce Hadoop release artifacts. 
+    (tucu)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
     HADOOP-10142. Avoid groups lookup for unprivileged users such as "dr.who"
     HADOOP-10142. Avoid groups lookup for unprivileged users such as "dr.who"
@@ -671,6 +697,10 @@ Release 2.3.0 - UNRELEASED
     HADOOP-10310. SaslRpcServer should be initialized even when no secret
     HADOOP-10310. SaslRpcServer should be initialized even when no secret
     manager present. (atm)
     manager present. (atm)
 
 
+    HADOOP-10311. Cleanup vendor names from the code base. (tucu)
+
+    HADOOP-10273. Fix 'mvn site'. (Arpit Agarwal)
+
 Release 2.2.0 - 2013-10-13
 Release 2.2.0 - 2013-10-13
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 2 - 0
hadoop-common-project/hadoop-common/pom.xml

@@ -543,6 +543,7 @@
                     <javahClassName>org.apache.hadoop.io.compress.bzip2.Bzip2Decompressor</javahClassName>
                     <javahClassName>org.apache.hadoop.io.compress.bzip2.Bzip2Decompressor</javahClassName>
                     <javahClassName>org.apache.hadoop.security.JniBasedUnixGroupsMapping</javahClassName>
                     <javahClassName>org.apache.hadoop.security.JniBasedUnixGroupsMapping</javahClassName>
                     <javahClassName>org.apache.hadoop.io.nativeio.NativeIO</javahClassName>
                     <javahClassName>org.apache.hadoop.io.nativeio.NativeIO</javahClassName>
+                    <javahClassName>org.apache.hadoop.io.nativeio.SharedFileDescriptorFactory</javahClassName>
                     <javahClassName>org.apache.hadoop.security.JniBasedUnixGroupsNetgroupMapping</javahClassName>
                     <javahClassName>org.apache.hadoop.security.JniBasedUnixGroupsNetgroupMapping</javahClassName>
                     <javahClassName>org.apache.hadoop.io.compress.snappy.SnappyCompressor</javahClassName>
                     <javahClassName>org.apache.hadoop.io.compress.snappy.SnappyCompressor</javahClassName>
                     <javahClassName>org.apache.hadoop.io.compress.snappy.SnappyDecompressor</javahClassName>
                     <javahClassName>org.apache.hadoop.io.compress.snappy.SnappyDecompressor</javahClassName>
@@ -550,6 +551,7 @@
                     <javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Decompressor</javahClassName>
                     <javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Decompressor</javahClassName>
                     <javahClassName>org.apache.hadoop.util.NativeCrc32</javahClassName>
                     <javahClassName>org.apache.hadoop.util.NativeCrc32</javahClassName>
                     <javahClassName>org.apache.hadoop.net.unix.DomainSocket</javahClassName>
                     <javahClassName>org.apache.hadoop.net.unix.DomainSocket</javahClassName>
+                    <javahClassName>org.apache.hadoop.net.unix.DomainSocketWatcher</javahClassName>
                   </javahClassNames>
                   </javahClassNames>
                   <javahOutputDirectory>${project.build.directory}/native/javah</javahOutputDirectory>
                   <javahOutputDirectory>${project.build.directory}/native/javah</javahOutputDirectory>
                 </configuration>
                 </configuration>

+ 2 - 0
hadoop-common-project/hadoop-common/src/CMakeLists.txt

@@ -178,7 +178,9 @@ add_dual_library(hadoop
     ${D}/io/nativeio/NativeIO.c
     ${D}/io/nativeio/NativeIO.c
     ${D}/io/nativeio/errno_enum.c
     ${D}/io/nativeio/errno_enum.c
     ${D}/io/nativeio/file_descriptor.c
     ${D}/io/nativeio/file_descriptor.c
+    ${D}/io/nativeio/SharedFileDescriptorFactory.c
     ${D}/net/unix/DomainSocket.c
     ${D}/net/unix/DomainSocket.c
+    ${D}/net/unix/DomainSocketWatcher.c
     ${D}/security/JniBasedUnixGroupsMapping.c
     ${D}/security/JniBasedUnixGroupsMapping.c
     ${D}/security/JniBasedUnixGroupsNetgroupMapping.c
     ${D}/security/JniBasedUnixGroupsNetgroupMapping.c
     ${D}/security/hadoop_group_info.c
     ${D}/security/hadoop_group_info.c

+ 9 - 4
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileChecksum.java

@@ -21,21 +21,26 @@ import java.util.Arrays;
 
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.Options.ChecksumOpt;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.Writable;
 
 
 /** An abstract class representing file checksums for files. */
 /** An abstract class representing file checksums for files. */
 @InterfaceAudience.Public
 @InterfaceAudience.Public
 @InterfaceStability.Stable
 @InterfaceStability.Stable
 public abstract class FileChecksum implements Writable {
 public abstract class FileChecksum implements Writable {
-  /** The checksum algorithm name */ 
+  /** The checksum algorithm name */
   public abstract String getAlgorithmName();
   public abstract String getAlgorithmName();
 
 
-  /** The length of the checksum in bytes */ 
+  /** The length of the checksum in bytes */
   public abstract int getLength();
   public abstract int getLength();
 
 
-  /** The value of the checksum in bytes */ 
+  /** The value of the checksum in bytes */
   public abstract byte[] getBytes();
   public abstract byte[] getBytes();
 
 
+  public ChecksumOpt getChecksumOpt() {
+    return null;
+  }
+
   /** Return true if both the algorithms and the values are the same. */
   /** Return true if both the algorithms and the values are the same. */
   @Override
   @Override
   public boolean equals(Object other) {
   public boolean equals(Object other) {
@@ -50,7 +55,7 @@ public abstract class FileChecksum implements Writable {
     return this.getAlgorithmName().equals(that.getAlgorithmName())
     return this.getAlgorithmName().equals(that.getAlgorithmName())
       && Arrays.equals(this.getBytes(), that.getBytes());
       && Arrays.equals(this.getBytes(), that.getBytes());
   }
   }
-  
+
   @Override
   @Override
   public int hashCode() {
   public int hashCode() {
     return getAlgorithmName().hashCode() ^ Arrays.hashCode(getBytes());
     return getAlgorithmName().hashCode() ^ Arrays.hashCode(getBytes());

+ 8 - 7
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java

@@ -56,7 +56,7 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
     this.crcPerBlock = crcPerBlock;
     this.crcPerBlock = crcPerBlock;
     this.md5 = md5;
     this.md5 = md5;
   }
   }
-  
+
   @Override
   @Override
   public String getAlgorithmName() {
   public String getAlgorithmName() {
     return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC +
     return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC +
@@ -73,10 +73,10 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
 
 
     throw new IOException("Unknown checksum type in " + algorithm);
     throw new IOException("Unknown checksum type in " + algorithm);
   }
   }
- 
+
   @Override
   @Override
   public int getLength() {return LENGTH;}
   public int getLength() {return LENGTH;}
- 
+
   @Override
   @Override
   public byte[] getBytes() {
   public byte[] getBytes() {
     return WritableUtils.toByteArray(this);
     return WritableUtils.toByteArray(this);
@@ -88,6 +88,7 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
     return DataChecksum.Type.CRC32;
     return DataChecksum.Type.CRC32;
   }
   }
 
 
+  @Override
   public ChecksumOpt getChecksumOpt() {
   public ChecksumOpt getChecksumOpt() {
     return new ChecksumOpt(getCrcType(), bytesPerCRC);
     return new ChecksumOpt(getCrcType(), bytesPerCRC);
   }
   }
@@ -98,12 +99,12 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
     crcPerBlock = in.readLong();
     crcPerBlock = in.readLong();
     md5 = MD5Hash.read(in);
     md5 = MD5Hash.read(in);
   }
   }
- 
+
   @Override
   @Override
   public void write(DataOutput out) throws IOException {
   public void write(DataOutput out) throws IOException {
     out.writeInt(bytesPerCRC);
     out.writeInt(bytesPerCRC);
     out.writeLong(crcPerBlock);
     out.writeLong(crcPerBlock);
-    md5.write(out);    
+    md5.write(out);
   }
   }
 
 
   /** Write that object to xml output. */
   /** Write that object to xml output. */
@@ -157,11 +158,11 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
       }
       }
     } catch (Exception e) {
     } catch (Exception e) {
       throw new SAXException("Invalid attributes: bytesPerCRC=" + bytesPerCRC
       throw new SAXException("Invalid attributes: bytesPerCRC=" + bytesPerCRC
-          + ", crcPerBlock=" + crcPerBlock + ", crcType=" + crcType 
+          + ", crcPerBlock=" + crcPerBlock + ", crcType=" + crcType
           + ", md5=" + md5, e);
           + ", md5=" + md5, e);
     }
     }
   }
   }
- 
+
   @Override
   @Override
   public String toString() {
   public String toString() {
     return getAlgorithmName() + ":" + md5;
     return getAlgorithmName() + ":" + md5;

+ 6 - 5
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpConfig.java

@@ -34,13 +34,14 @@ public class HttpConfig {
     HTTPS_ONLY,
     HTTPS_ONLY,
     HTTP_AND_HTTPS;
     HTTP_AND_HTTPS;
 
 
+    private static final Policy[] VALUES = values();
     public static Policy fromString(String value) {
     public static Policy fromString(String value) {
-      if (HTTPS_ONLY.name().equalsIgnoreCase(value)) {
-        return HTTPS_ONLY;
-      } else if (HTTP_AND_HTTPS.name().equalsIgnoreCase(value)) {
-        return HTTP_AND_HTTPS;
+      for (Policy p : VALUES) {
+        if (p.name().equalsIgnoreCase(value)) {
+          return p;
+        }
       }
       }
-      return HTTP_ONLY;
+      return null;
     }
     }
 
 
     public boolean isHttpEnabled() {
     public boolean isHttpEnabled() {

+ 10 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java

@@ -487,6 +487,16 @@ public class NativeIO {
       new ConcurrentHashMap<Integer, CachedName>();
       new ConcurrentHashMap<Integer, CachedName>();
 
 
     private enum IdCache { USER, GROUP }
     private enum IdCache { USER, GROUP }
+
+    public final static int MMAP_PROT_READ = 0x1; 
+    public final static int MMAP_PROT_WRITE = 0x2; 
+    public final static int MMAP_PROT_EXEC = 0x4; 
+
+    public static native long mmap(FileDescriptor fd, int prot,
+        boolean shared, long length) throws IOException;
+
+    public static native void munmap(long addr, long length)
+        throws IOException;
   }
   }
 
 
   private static boolean workaroundNonThreadSafePasswdCalls = false;
   private static boolean workaroundNonThreadSafePasswdCalls = false;

+ 90 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/SharedFileDescriptorFactory.java

@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.nativeio;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.FileDescriptor;
+
+import org.apache.commons.lang.SystemUtils;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * A factory for creating shared file descriptors inside a given directory.
+ * Typically, the directory will be /dev/shm or /tmp.
+ *
+ * We will hand out file descriptors that correspond to unlinked files residing
+ * in that directory.  These file descriptors are suitable for sharing across
+ * multiple processes and are both readable and writable.
+ *
+ * Because we unlink the temporary files right after creating them, a JVM crash
+ * usually does not leave behind any temporary files in the directory.  However,
+ * it may happen that we crash right after creating the file and before
+ * unlinking it.  In the constructor, we attempt to clean up after any such
+ * remnants by trying to unlink any temporary files created by previous
+ * SharedFileDescriptorFactory instances that also used our prefix.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public class SharedFileDescriptorFactory {
+  private final String prefix;
+  private final String path;
+
+  /**
+   * Create a SharedFileDescriptorFactory.
+   *
+   * @param prefix    Prefix to add to all file names we use.
+   * @param path      Path to use.
+   */
+  public SharedFileDescriptorFactory(String prefix, String path)
+      throws IOException {
+    Preconditions.checkArgument(NativeIO.isAvailable());
+    Preconditions.checkArgument(SystemUtils.IS_OS_UNIX);
+    this.prefix = prefix;
+    this.path = path;
+    deleteStaleTemporaryFiles0(prefix, path);
+  }
+
+  /**
+   * Create a shared file descriptor which will be both readable and writable.
+   *
+   * @param length         The starting file length.
+   *
+   * @return               The file descriptor, wrapped in a FileInputStream.
+   * @throws IOException   If there was an I/O or configuration error creating
+   *                       the descriptor.
+   */
+  public FileInputStream createDescriptor(int length) throws IOException {
+    return new FileInputStream(createDescriptor0(prefix, path, length));
+  }
+
+  /**
+   * Delete temporary files in the directory, NOT following symlinks.
+   */
+  private static native void deleteStaleTemporaryFiles0(String prefix,
+      String path) throws IOException;
+
+  /**
+   * Create a file with O_EXCL, and then resize it to the desired size.
+   */
+  private static native FileDescriptor createDescriptor0(String prefix,
+      String path, int length) throws IOException;
+}

+ 19 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java

@@ -151,6 +151,13 @@ public class RetryPolicies {
         delayMillis, maxDelayBase);
         delayMillis, maxDelayBase);
   }
   }
   
   
+  public static final RetryPolicy failoverOnNetworkException(
+      RetryPolicy fallbackPolicy, int maxFailovers, int maxRetries,
+      long delayMillis, long maxDelayBase) {
+    return new FailoverOnNetworkExceptionRetry(fallbackPolicy, maxFailovers,
+        maxRetries, delayMillis, maxDelayBase);
+  }
+  
   static class TryOnceThenFail implements RetryPolicy {
   static class TryOnceThenFail implements RetryPolicy {
     @Override
     @Override
     public RetryAction shouldRetry(Exception e, int retries, int failovers,
     public RetryAction shouldRetry(Exception e, int retries, int failovers,
@@ -516,18 +523,25 @@ public class RetryPolicies {
     
     
     private RetryPolicy fallbackPolicy;
     private RetryPolicy fallbackPolicy;
     private int maxFailovers;
     private int maxFailovers;
+    private int maxRetries;
     private long delayMillis;
     private long delayMillis;
     private long maxDelayBase;
     private long maxDelayBase;
     
     
     public FailoverOnNetworkExceptionRetry(RetryPolicy fallbackPolicy,
     public FailoverOnNetworkExceptionRetry(RetryPolicy fallbackPolicy,
         int maxFailovers) {
         int maxFailovers) {
-      this(fallbackPolicy, maxFailovers, 0, 0);
+      this(fallbackPolicy, maxFailovers, 0, 0, 0);
     }
     }
     
     
     public FailoverOnNetworkExceptionRetry(RetryPolicy fallbackPolicy,
     public FailoverOnNetworkExceptionRetry(RetryPolicy fallbackPolicy,
         int maxFailovers, long delayMillis, long maxDelayBase) {
         int maxFailovers, long delayMillis, long maxDelayBase) {
+      this(fallbackPolicy, maxFailovers, 0, delayMillis, maxDelayBase);
+    }
+    
+    public FailoverOnNetworkExceptionRetry(RetryPolicy fallbackPolicy,
+        int maxFailovers, int maxRetries, long delayMillis, long maxDelayBase) {
       this.fallbackPolicy = fallbackPolicy;
       this.fallbackPolicy = fallbackPolicy;
       this.maxFailovers = maxFailovers;
       this.maxFailovers = maxFailovers;
+      this.maxRetries = maxRetries;
       this.delayMillis = delayMillis;
       this.delayMillis = delayMillis;
       this.maxDelayBase = maxDelayBase;
       this.maxDelayBase = maxDelayBase;
     }
     }
@@ -549,6 +563,10 @@ public class RetryPolicies {
             "failovers (" + failovers + ") exceeded maximum allowed ("
             "failovers (" + failovers + ") exceeded maximum allowed ("
             + maxFailovers + ")");
             + maxFailovers + ")");
       }
       }
+      if (retries - failovers > maxRetries) {
+        return new RetryAction(RetryAction.RetryDecision.FAIL, 0, "retries ("
+            + retries + ") exceeded maximum allowed (" + maxRetries + ")");
+      }
       
       
       if (e instanceof ConnectException ||
       if (e instanceof ConnectException ||
           e instanceof NoRouteToHostException ||
           e instanceof NoRouteToHostException ||

+ 8 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java

@@ -450,6 +450,14 @@ public abstract class Server {
     serviceAuthorizationManager.refresh(conf, provider);
     serviceAuthorizationManager.refresh(conf, provider);
   }
   }
 
 
+  /**
+   * Refresh the service authorization ACL for the service handled by this server
+   * using the specified Configuration.
+   */
+  public void refreshServiceAclWithConfigration(Configuration conf,
+      PolicyProvider provider) {
+    serviceAuthorizationManager.refreshWithConfiguration(conf, provider);
+  }
   /**
   /**
    * Returns a handle to the serviceAuthorizationManager (required in tests)
    * Returns a handle to the serviceAuthorizationManager (required in tests)
    * @return instance of ServiceAuthorizationManager for this server
    * @return instance of ServiceAuthorizationManager for this server

+ 50 - 134
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java

@@ -24,17 +24,15 @@ import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.OutputStream;
-import java.net.SocketException;
-import java.nio.channels.AsynchronousCloseException;
 import java.nio.channels.ClosedChannelException;
 import java.nio.channels.ClosedChannelException;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.ByteBuffer;
 import java.nio.ByteBuffer;
-import java.util.concurrent.atomic.AtomicInteger;
 
 
 import org.apache.commons.lang.SystemUtils;
 import org.apache.commons.lang.SystemUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.util.NativeCodeLoader;
 import org.apache.hadoop.util.NativeCodeLoader;
+import org.apache.hadoop.util.CloseableReferenceCount;
 
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.annotations.VisibleForTesting;
 
 
@@ -132,104 +130,14 @@ public class DomainSocket implements Closeable {
   }
   }
 
 
   /**
   /**
-   * Tracks the reference count of the file descriptor, and also whether it is
-   * open or closed.
+   * The socket reference count and closed bit.
    */
    */
-  private static class Status {
-    /**
-     * Bit mask representing a closed domain socket. 
-     */
-    private static final int STATUS_CLOSED_MASK = 1 << 30;
-    
-    /**
-     * Status bits
-     * 
-     * Bit 30: 0 = DomainSocket open, 1 = DomainSocket closed
-     * Bits 29 to 0: the reference count.
-     */
-    private final AtomicInteger bits = new AtomicInteger(0);
-
-    Status() { }
-
-    /**
-     * Increment the reference count of the underlying file descriptor.
-     *
-     * @throws ClosedChannelException      If the file descriptor is closed.
-     */
-    void reference() throws ClosedChannelException {
-      int curBits = bits.incrementAndGet();
-      if ((curBits & STATUS_CLOSED_MASK) != 0) {
-        bits.decrementAndGet();
-        throw new ClosedChannelException();
-      }
-    }
-
-    /**
-     * Decrement the reference count of the underlying file descriptor.
-     *
-     * @param checkClosed        Whether to throw an exception if the file
-     *                           descriptor is closed.
-     *
-     * @throws AsynchronousCloseException  If the file descriptor is closed and
-     *                                     checkClosed is set.
-     */
-    void unreference(boolean checkClosed) throws AsynchronousCloseException {
-      int newCount = bits.decrementAndGet();
-      assert (newCount & ~STATUS_CLOSED_MASK) >= 0;
-      if (checkClosed && ((newCount & STATUS_CLOSED_MASK) != 0)) {
-        throw new AsynchronousCloseException();
-      }
-    }
-
-    /**
-     * Return true if the file descriptor is currently open.
-     * 
-     * @return                 True if the file descriptor is currently open.
-     */
-    boolean isOpen() {
-      return ((bits.get() & STATUS_CLOSED_MASK) == 0);
-    }
-
-    /**
-     * Mark the file descriptor as closed.
-     *
-     * Once the file descriptor is closed, it cannot be reopened.
-     *
-     * @return                         The current reference count.
-     * @throws ClosedChannelException  If someone else closes the file 
-     *                                 descriptor before we do.
-     */
-    int setClosed() throws ClosedChannelException {
-      while (true) {
-        int curBits = bits.get();
-        if ((curBits & STATUS_CLOSED_MASK) != 0) {
-          throw new ClosedChannelException();
-        }
-        if (bits.compareAndSet(curBits, curBits | STATUS_CLOSED_MASK)) {
-          return curBits & (~STATUS_CLOSED_MASK);
-        }
-      }
-    }
-
-    /**
-     * Get the current reference count.
-     *
-     * @return                 The current reference count.
-     */
-    int getReferenceCount() {
-      return bits.get() & (~STATUS_CLOSED_MASK);
-    }
-  }
-
-  /**
-   * The socket status.
-   */
-  private final Status status;
+  final CloseableReferenceCount refCount;
 
 
   /**
   /**
    * The file descriptor associated with this UNIX domain socket.
    * The file descriptor associated with this UNIX domain socket.
    */
    */
-  private final int fd;
+  final int fd;
 
 
   /**
   /**
    * The path associated with this UNIX domain socket.
    * The path associated with this UNIX domain socket.
@@ -252,13 +160,21 @@ public class DomainSocket implements Closeable {
   private final DomainChannel channel = new DomainChannel();
   private final DomainChannel channel = new DomainChannel();
 
 
   private DomainSocket(String path, int fd) {
   private DomainSocket(String path, int fd) {
-    this.status = new Status();
+    this.refCount = new CloseableReferenceCount();
     this.fd = fd;
     this.fd = fd;
     this.path = path;
     this.path = path;
   }
   }
 
 
   private static native int bind0(String path) throws IOException;
   private static native int bind0(String path) throws IOException;
 
 
+  private void unreference(boolean checkClosed) throws ClosedChannelException {
+    if (checkClosed) {
+      refCount.unreferenceCheckClosed();
+    } else {
+      refCount.unreference();
+    }
+  }
+
   /**
   /**
    * Create a new DomainSocket listening on the given path.
    * Create a new DomainSocket listening on the given path.
    *
    *
@@ -308,14 +224,14 @@ public class DomainSocket implements Closeable {
    * @throws SocketTimeoutException       If the accept timed out.
    * @throws SocketTimeoutException       If the accept timed out.
    */
    */
   public DomainSocket accept() throws IOException {
   public DomainSocket accept() throws IOException {
-    status.reference();
+    refCount.reference();
     boolean exc = true;
     boolean exc = true;
     try {
     try {
       DomainSocket ret = new DomainSocket(path, accept0(fd));
       DomainSocket ret = new DomainSocket(path, accept0(fd));
       exc = false;
       exc = false;
       return ret;
       return ret;
     } finally {
     } finally {
-      status.unreference(exc);
+      unreference(exc);
     }
     }
   }
   }
 
 
@@ -335,14 +251,14 @@ public class DomainSocket implements Closeable {
     return new DomainSocket(path, fd);
     return new DomainSocket(path, fd);
   }
   }
 
 
- /**
-  * Return true if the file descriptor is currently open.
-  *
-  * @return                 True if the file descriptor is currently open.
-  */
- public boolean isOpen() {
-   return status.isOpen();
- }
+  /**
+   * Return true if the file descriptor is currently open.
+   *
+   * @return                 True if the file descriptor is currently open.
+   */
+  public boolean isOpen() {
+    return refCount.isOpen();
+  }
 
 
   /**
   /**
    * @return                 The socket path.
    * @return                 The socket path.
@@ -381,20 +297,20 @@ public class DomainSocket implements Closeable {
       throws IOException;
       throws IOException;
 
 
   public void setAttribute(int type, int size) throws IOException {
   public void setAttribute(int type, int size) throws IOException {
-    status.reference();
+    refCount.reference();
     boolean exc = true;
     boolean exc = true;
     try {
     try {
       setAttribute0(fd, type, size);
       setAttribute0(fd, type, size);
       exc = false;
       exc = false;
     } finally {
     } finally {
-      status.unreference(exc);
+      unreference(exc);
     }
     }
   }
   }
 
 
   private native int getAttribute0(int fd, int type) throws IOException;
   private native int getAttribute0(int fd, int type) throws IOException;
 
 
   public int getAttribute(int type) throws IOException {
   public int getAttribute(int type) throws IOException {
-    status.reference();
+    refCount.reference();
     int attribute;
     int attribute;
     boolean exc = true;
     boolean exc = true;
     try {
     try {
@@ -402,7 +318,7 @@ public class DomainSocket implements Closeable {
       exc = false;
       exc = false;
       return attribute;
       return attribute;
     } finally {
     } finally {
-      status.unreference(exc);
+      unreference(exc);
     }
     }
   }
   }
 
 
@@ -419,9 +335,9 @@ public class DomainSocket implements Closeable {
   @Override
   @Override
   public void close() throws IOException {
   public void close() throws IOException {
     // Set the closed bit on this DomainSocket
     // Set the closed bit on this DomainSocket
-    int refCount;
+    int count;
     try {
     try {
-      refCount = status.setClosed();
+      count = refCount.setClosed();
     } catch (ClosedChannelException e) {
     } catch (ClosedChannelException e) {
       // Someone else already closed the DomainSocket.
       // Someone else already closed the DomainSocket.
       return;
       return;
@@ -429,7 +345,7 @@ public class DomainSocket implements Closeable {
     // Wait for all references to go away
     // Wait for all references to go away
     boolean didShutdown = false;
     boolean didShutdown = false;
     boolean interrupted = false;
     boolean interrupted = false;
-    while (refCount > 0) {
+    while (count > 0) {
       if (!didShutdown) {
       if (!didShutdown) {
         try {
         try {
           // Calling shutdown on the socket will interrupt blocking system
           // Calling shutdown on the socket will interrupt blocking system
@@ -446,7 +362,7 @@ public class DomainSocket implements Closeable {
       } catch (InterruptedException e) {
       } catch (InterruptedException e) {
         interrupted = true;
         interrupted = true;
       }
       }
-      refCount = status.getReferenceCount();
+      count = refCount.getReferenceCount();
     }
     }
 
 
     // At this point, nobody has a reference to the file descriptor, 
     // At this point, nobody has a reference to the file descriptor, 
@@ -478,13 +394,13 @@ public class DomainSocket implements Closeable {
    */
    */
   public void sendFileDescriptors(FileDescriptor descriptors[],
   public void sendFileDescriptors(FileDescriptor descriptors[],
       byte jbuf[], int offset, int length) throws IOException {
       byte jbuf[], int offset, int length) throws IOException {
-    status.reference();
+    refCount.reference();
     boolean exc = true;
     boolean exc = true;
     try {
     try {
       sendFileDescriptors0(fd, descriptors, jbuf, offset, length);
       sendFileDescriptors0(fd, descriptors, jbuf, offset, length);
       exc = false;
       exc = false;
     } finally {
     } finally {
-      status.unreference(exc);
+      unreference(exc);
     }
     }
   }
   }
 
 
@@ -515,14 +431,14 @@ public class DomainSocket implements Closeable {
    */
    */
   public int receiveFileDescriptors(FileDescriptor[] descriptors,
   public int receiveFileDescriptors(FileDescriptor[] descriptors,
       byte jbuf[], int offset, int length) throws IOException {
       byte jbuf[], int offset, int length) throws IOException {
-    status.reference();
+    refCount.reference();
     boolean exc = true;
     boolean exc = true;
     try {
     try {
       int nBytes = receiveFileDescriptors0(fd, descriptors, jbuf, offset, length);
       int nBytes = receiveFileDescriptors0(fd, descriptors, jbuf, offset, length);
       exc = false;
       exc = false;
       return nBytes;
       return nBytes;
     } finally {
     } finally {
-      status.unreference(exc);
+      unreference(exc);
     }
     }
   }
   }
 
 
@@ -539,7 +455,7 @@ public class DomainSocket implements Closeable {
     for (int i = 0; i < streams.length; i++) {
     for (int i = 0; i < streams.length; i++) {
       streams[i] = null;
       streams[i] = null;
     }
     }
-    status.reference();
+    refCount.reference();
     try {
     try {
       int ret = receiveFileDescriptors0(fd, descriptors, buf, offset, length);
       int ret = receiveFileDescriptors0(fd, descriptors, buf, offset, length);
       for (int i = 0, j = 0; i < descriptors.length; i++) {
       for (int i = 0, j = 0; i < descriptors.length; i++) {
@@ -569,7 +485,7 @@ public class DomainSocket implements Closeable {
           }
           }
         }
         }
       }
       }
-      status.unreference(!success);
+      unreference(!success);
     }
     }
   }
   }
 
 
@@ -593,7 +509,7 @@ public class DomainSocket implements Closeable {
   public class DomainInputStream extends InputStream {
   public class DomainInputStream extends InputStream {
     @Override
     @Override
     public int read() throws IOException {
     public int read() throws IOException {
-      status.reference();
+      refCount.reference();
       boolean exc = true;
       boolean exc = true;
       try {
       try {
         byte b[] = new byte[1];
         byte b[] = new byte[1];
@@ -601,33 +517,33 @@ public class DomainSocket implements Closeable {
         exc = false;
         exc = false;
         return (ret >= 0) ? b[0] : -1;
         return (ret >= 0) ? b[0] : -1;
       } finally {
       } finally {
-        status.unreference(exc);
+        unreference(exc);
       }
       }
     }
     }
     
     
     @Override
     @Override
     public int read(byte b[], int off, int len) throws IOException {
     public int read(byte b[], int off, int len) throws IOException {
-      status.reference();
+      refCount.reference();
       boolean exc = true;
       boolean exc = true;
       try {
       try {
         int nRead = DomainSocket.readArray0(DomainSocket.this.fd, b, off, len);
         int nRead = DomainSocket.readArray0(DomainSocket.this.fd, b, off, len);
         exc = false;
         exc = false;
         return nRead;
         return nRead;
       } finally {
       } finally {
-        status.unreference(exc);
+        unreference(exc);
       }
       }
     }
     }
 
 
     @Override
     @Override
     public int available() throws IOException {
     public int available() throws IOException {
-      status.reference();
+      refCount.reference();
       boolean exc = true;
       boolean exc = true;
       try {
       try {
         int nAvailable = DomainSocket.available0(DomainSocket.this.fd);
         int nAvailable = DomainSocket.available0(DomainSocket.this.fd);
         exc = false;
         exc = false;
         return nAvailable;
         return nAvailable;
       } finally {
       } finally {
-        status.unreference(exc);
+        unreference(exc);
       }
       }
     }
     }
 
 
@@ -649,7 +565,7 @@ public class DomainSocket implements Closeable {
 
 
     @Override
     @Override
     public void write(int val) throws IOException {
     public void write(int val) throws IOException {
-      status.reference();
+      refCount.reference();
       boolean exc = true;
       boolean exc = true;
       try {
       try {
         byte b[] = new byte[1];
         byte b[] = new byte[1];
@@ -657,19 +573,19 @@ public class DomainSocket implements Closeable {
         DomainSocket.writeArray0(DomainSocket.this.fd, b, 0, 1);
         DomainSocket.writeArray0(DomainSocket.this.fd, b, 0, 1);
         exc = false;
         exc = false;
       } finally {
       } finally {
-        status.unreference(exc);
+        unreference(exc);
       }
       }
     }
     }
 
 
     @Override
     @Override
     public void write(byte[] b, int off, int len) throws IOException {
     public void write(byte[] b, int off, int len) throws IOException {
-      status.reference();
-        boolean exc = true;
+      refCount.reference();
+      boolean exc = true;
       try {
       try {
         DomainSocket.writeArray0(DomainSocket.this.fd, b, off, len);
         DomainSocket.writeArray0(DomainSocket.this.fd, b, off, len);
         exc = false;
         exc = false;
       } finally {
       } finally {
-        status.unreference(exc);
+        unreference(exc);
       }
       }
     }
     }
   }
   }
@@ -688,7 +604,7 @@ public class DomainSocket implements Closeable {
 
 
     @Override
     @Override
     public int read(ByteBuffer dst) throws IOException {
     public int read(ByteBuffer dst) throws IOException {
-      status.reference();
+      refCount.reference();
       boolean exc = true;
       boolean exc = true;
       try {
       try {
         int nread = 0;
         int nread = 0;
@@ -710,7 +626,7 @@ public class DomainSocket implements Closeable {
         exc = false;
         exc = false;
         return nread;
         return nread;
       } finally {
       } finally {
-        status.unreference(exc);
+        unreference(exc);
       }
       }
     }
     }
   }
   }

+ 478 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocketWatcher.java

@@ -0,0 +1,478 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.net.unix;
+
+import java.io.Closeable;
+import java.io.EOFException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.io.IOUtils;
+
+import java.io.IOException;
+import java.nio.channels.ClosedChannelException;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.TreeMap;
+import java.util.Map;
+import java.util.concurrent.locks.Condition;
+import java.util.concurrent.locks.ReentrantLock;
+
+import org.apache.commons.lang.SystemUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.util.NativeCodeLoader;
+
+import com.google.common.base.Preconditions;
+import com.google.common.util.concurrent.Uninterruptibles;
+
+/**
+ * The DomainSocketWatcher watches a set of domain sockets to see when they
+ * become readable, or closed.  When one of those events happens, it makes a
+ * callback.
+ *
+ * See {@link DomainSocket} for more information about UNIX domain sockets.
+ */
+@InterfaceAudience.LimitedPrivate("HDFS")
+public final class DomainSocketWatcher extends Thread implements Closeable {
+  static {
+    if (SystemUtils.IS_OS_WINDOWS) {
+      loadingFailureReason = "UNIX Domain sockets are not available on Windows.";
+    } else if (!NativeCodeLoader.isNativeCodeLoaded()) {
+      loadingFailureReason = "libhadoop cannot be loaded.";
+    } else {
+      String problem;
+      try {
+        anchorNative();
+        problem = null;
+      } catch (Throwable t) {
+        problem = "DomainSocketWatcher#anchorNative got error: " +
+          t.getMessage();
+      }
+      loadingFailureReason = problem;
+    }
+  }
+
+  static Log LOG = LogFactory.getLog(DomainSocketWatcher.class);
+
+  /**
+   * The reason why DomainSocketWatcher is not available, or null if it is
+   * available.
+   */
+  private final static String loadingFailureReason;
+
+  /**
+   * Initializes the native library code.
+   */
+  private static native void anchorNative();
+
+  interface Handler {
+    /**
+     * Handles an event on a socket.  An event may be the socket becoming
+     * readable, or the remote end being closed.
+     *
+     * @param sock    The socket that the event occurred on.
+     * @return        Whether we should close the socket.
+     */
+    boolean handle(DomainSocket sock);
+  }
+
+  /**
+   * Handler for {DomainSocketWatcher#notificationSockets[1]}
+   */
+  private class NotificationHandler implements Handler {
+    public boolean handle(DomainSocket sock) {
+      try {
+        if (LOG.isTraceEnabled()) {
+          LOG.trace(this + ": NotificationHandler: doing a read on " +
+            sock.fd);
+        }
+        if (sock.getInputStream().read() == -1) {
+          if (LOG.isTraceEnabled()) {
+            LOG.trace(this + ": NotificationHandler: got EOF on " + sock.fd);
+          }
+          throw new EOFException();
+        }
+        if (LOG.isTraceEnabled()) {
+          LOG.trace(this + ": NotificationHandler: read succeeded on " +
+            sock.fd);
+        }
+        return false;
+      } catch (IOException e) {
+        if (LOG.isTraceEnabled()) {
+          LOG.trace(this + ": NotificationHandler: setting closed to " +
+              "true for " + sock.fd);
+        }
+        closed = true;
+        return true;
+      }
+    }
+  }
+
+  private static class Entry {
+    final DomainSocket socket;
+    final Handler handler;
+
+    Entry(DomainSocket socket, Handler handler) {
+      this.socket = socket;
+      this.handler = handler;
+    }
+
+    DomainSocket getDomainSocket() {
+      return socket;
+    }
+
+    Handler getHandler() {
+      return handler;
+    }
+  }
+
+  /**
+   * The FdSet is a set of file descriptors that gets passed to poll(2).
+   * It contains a native memory segment, so that we don't have to copy
+   * in the poll0 function.
+   */
+  private static class FdSet {
+    private long data;
+
+    private native static long alloc0();
+
+    FdSet() {
+      data = alloc0();
+    }
+
+    /**
+     * Add a file descriptor to the set.
+     *
+     * @param fd   The file descriptor to add.
+     */
+    native void add(int fd);
+
+    /**
+     * Remove a file descriptor from the set.
+     *
+     * @param fd   The file descriptor to remove.
+     */
+    native void remove(int fd);
+
+    /**
+     * Get an array containing all the FDs marked as readable.
+     * Also clear the state of all FDs.
+     *
+     * @return     An array containing all of the currently readable file
+     *             descriptors.
+     */
+    native int[] getAndClearReadableFds();
+
+    /**
+     * Close the object and de-allocate the memory used.
+     */
+    native void close();
+  }
+
+  /**
+   * Lock which protects toAdd, toRemove, and closed.
+   */
+  private final ReentrantLock lock = new ReentrantLock();
+
+  /**
+   * Condition variable which indicates that toAdd and toRemove have been
+   * processed.
+   */
+  private final Condition processedCond = lock.newCondition();
+
+  /**
+   * Entries to add.
+   */
+  private final LinkedList<Entry> toAdd =
+      new LinkedList<Entry>();
+
+  /**
+   * Entries to remove.
+   */
+  private final TreeMap<Integer, DomainSocket> toRemove =
+      new TreeMap<Integer, DomainSocket>();
+
+  /**
+   * Maximum length of time to go between checking whether the interrupted
+   * bit has been set for this thread.
+   */
+  private final int interruptCheckPeriodMs;
+
+  /**
+   * A pair of sockets used to wake up the thread after it has called poll(2).
+   */
+  private final DomainSocket notificationSockets[];
+
+  /**
+   * Whether or not this DomainSocketWatcher is closed.
+   */
+  private boolean closed = false;
+
+  public DomainSocketWatcher(int interruptCheckPeriodMs) throws IOException {
+    if (loadingFailureReason != null) {
+      throw new UnsupportedOperationException(loadingFailureReason);
+    }
+    notificationSockets = DomainSocket.socketpair();
+    this.interruptCheckPeriodMs = interruptCheckPeriodMs;
+    Preconditions.checkArgument(interruptCheckPeriodMs > 0);
+    watcherThread.start();
+  }
+
+  /**
+   * Close the DomainSocketWatcher and wait for its thread to terminate.
+   *
+   * If there is more than one close, all but the first will be ignored.
+   */
+  @Override
+  public void close() throws IOException {
+    try {
+      lock.lock();
+      if (closed) return;
+      LOG.info(this + ": closing");
+      closed = true;
+    } finally {
+      lock.unlock();
+    }
+    // Close notificationSockets[0], so that notificationSockets[1] gets an EOF
+    // event.  This will wake up the thread immediately if it is blocked inside
+    // the select() system call.
+    notificationSockets[0].close();
+    // Wait for the select thread to terminate.
+    Uninterruptibles.joinUninterruptibly(watcherThread);
+  }
+
+  /**
+   * Add a socket.
+   *
+   * @param sock     The socket to add.  It is an error to re-add a socket that
+   *                   we are already watching.
+   * @param handler  The handler to associate with this socket.  This may be
+   *                   called any time after this function is called.
+   */
+  public void add(DomainSocket sock, Handler handler) {
+    try {
+      lock.lock();
+      checkNotClosed();
+      Entry entry = new Entry(sock, handler);
+      try {
+        sock.refCount.reference();
+      } catch (ClosedChannelException e) {
+        Preconditions.checkArgument(false,
+            "tried to add a closed DomainSocket to " + this);
+      }
+      toAdd.add(entry);
+      kick();
+      while (true) {
+        try {
+          processedCond.await();
+        } catch (InterruptedException e) {
+          this.interrupt();
+        }
+        if (!toAdd.contains(entry)) {
+          break;
+        }
+        checkNotClosed();
+      }
+    } finally {
+      lock.unlock();
+    }
+  }
+
+  /**
+   * Remove a socket.  Its handler will be called.
+   *
+   * @param sock     The socket to remove.
+   */
+  public void remove(DomainSocket sock) {
+    try {
+      lock.lock();
+      checkNotClosed();
+      toRemove.put(sock.fd, sock);
+      kick();
+      while (true) {
+        try {
+          processedCond.await();
+        } catch (InterruptedException e) {
+          this.interrupt();
+        }
+        if (!toRemove.containsKey(sock.fd)) {
+          break;
+        }
+        checkNotClosed();
+      }
+    } finally {
+      lock.unlock();
+    }
+  }
+
+  /**
+   * Wake up the DomainSocketWatcher thread.
+   */
+  private void kick() {
+    try {
+      notificationSockets[0].getOutputStream().write(0);
+    } catch (IOException e) {
+      LOG.error(this + ": error writing to notificationSockets[0]", e);
+    }
+  }
+
+  /**
+   * Check that the DomainSocketWatcher is not closed.
+   * Must be called while holding the lock.
+   */
+  private void checkNotClosed() {
+    Preconditions.checkState(lock.isHeldByCurrentThread());
+    if (closed) {
+      throw new RuntimeException("DomainSocketWatcher is closed.");
+    }
+  }
+
+  private void sendCallback(String caller, TreeMap<Integer, Entry> entries,
+      FdSet fdSet, int fd) {
+    if (LOG.isTraceEnabled()) {
+      LOG.trace(this + ": " + caller + " starting sendCallback for fd " + fd);
+    }
+    Entry entry = entries.get(fd);
+    Preconditions.checkNotNull(entry,
+        this + ": fdSet contained " + fd + ", which we were " +
+        "not tracking.");
+    DomainSocket sock = entry.getDomainSocket();
+    if (entry.getHandler().handle(sock)) {
+      if (LOG.isTraceEnabled()) {
+        LOG.trace(this + ": " + caller + ": closing fd " + fd +
+            " at the request of the handler.");
+      }
+      if (toRemove.remove(fd) != null) {
+        if (LOG.isTraceEnabled()) {
+          LOG.trace(this + ": " + caller + " : sendCallback processed fd " +
+            fd  + " in toRemove.");
+        }
+      }
+      try {
+        sock.refCount.unreferenceCheckClosed();
+      } catch (IOException e) {
+        Preconditions.checkArgument(false,
+            this + ": file descriptor " + sock.fd + " was closed while " +
+            "still in the poll(2) loop.");
+      }
+      IOUtils.cleanup(LOG, sock);
+      entries.remove(fd);
+      fdSet.remove(fd);
+    } else {
+      if (LOG.isTraceEnabled()) {
+        LOG.trace(this + ": " + caller + ": sendCallback not " +
+            "closing fd " + fd);
+      }
+    }
+  }
+
+  private final Thread watcherThread = new Thread(new Runnable() {
+    @Override
+    public void run() {
+      LOG.info(this + ": starting with interruptCheckPeriodMs = " +
+          interruptCheckPeriodMs);
+      final TreeMap<Integer, Entry> entries = new TreeMap<Integer, Entry>();
+      FdSet fdSet = new FdSet();
+      addNotificationSocket(entries, fdSet);
+      try {
+        while (true) {
+          lock.lock();
+          try {
+            for (int fd : fdSet.getAndClearReadableFds()) {
+              sendCallback("getAndClearReadableFds", entries, fdSet, fd);
+            }
+            if (!(toAdd.isEmpty() && toRemove.isEmpty())) {
+              // Handle pending additions (before pending removes).
+              for (Iterator<Entry> iter = toAdd.iterator(); iter.hasNext(); ) {
+                Entry entry = iter.next();
+                DomainSocket sock = entry.getDomainSocket();
+                Entry prevEntry = entries.put(sock.fd, entry);
+                Preconditions.checkState(prevEntry == null,
+                    this + ": tried to watch a file descriptor that we " +
+                    "were already watching: " + sock);
+                if (LOG.isTraceEnabled()) {
+                  LOG.trace(this + ": adding fd " + sock.fd);
+                }
+                fdSet.add(sock.fd);
+                iter.remove();
+              }
+              // Handle pending removals
+              while (true) {
+                Map.Entry<Integer, DomainSocket> entry = toRemove.firstEntry();
+                if (entry == null) break;
+                sendCallback("handlePendingRemovals",
+                    entries, fdSet, entry.getValue().fd);
+              }
+              processedCond.signalAll();
+            }
+            // Check if the thread should terminate.  Doing this check now is
+            // easier than at the beginning of the loop, since we know toAdd and
+            // toRemove are now empty and processedCond has been notified if it
+            // needed to be.
+            if (closed) {
+              LOG.info(toString() + " thread terminating.");
+              return;
+            }
+            // Check if someone sent our thread an InterruptedException while we
+            // were waiting in poll().
+            if (Thread.interrupted()) {
+              throw new InterruptedException();
+            }
+          } finally {
+            lock.unlock();
+          }
+          doPoll0(interruptCheckPeriodMs, fdSet);
+        }
+      } catch (InterruptedException e) {
+        LOG.info(toString() + " terminating on InterruptedException");
+      } catch (IOException e) {
+        LOG.error(toString() + " terminating on IOException", e);
+      } finally {
+        for (Entry entry : entries.values()) {
+          sendCallback("close", entries, fdSet, entry.getDomainSocket().fd);
+        }
+        entries.clear();
+        fdSet.close();
+      }
+    }
+  });
+
+  private void addNotificationSocket(final TreeMap<Integer, Entry> entries,
+      FdSet fdSet) {
+    entries.put(notificationSockets[1].fd, 
+        new Entry(notificationSockets[1], new NotificationHandler()));
+    try {
+      notificationSockets[1].refCount.reference();
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    fdSet.add(notificationSockets[1].fd);
+    if (LOG.isTraceEnabled()) {
+      LOG.trace(this + ": adding notificationSocket " +
+          notificationSockets[1].fd + ", connected to " +
+          notificationSockets[0].fd);
+    }
+  }
+
+  public String toString() {
+    return "DomainSocketWatcher(" + System.identityHashCode(this) + ")"; 
+  }
+
+  private static native int doPoll0(int maxWaitMs, FdSet readFds)
+      throws IOException;
+}

+ 11 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ProxyUsers.java

@@ -30,6 +30,8 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.StringUtils;
 
 
+import com.google.common.annotations.VisibleForTesting;
+
 @InterfaceAudience.Private
 @InterfaceAudience.Private
 public class ProxyUsers {
 public class ProxyUsers {
 
 
@@ -177,4 +179,13 @@ public class ProxyUsers {
       (list.contains("*"));
       (list.contains("*"));
   }
   }
 
 
+  @VisibleForTesting
+  public static Map<String, Collection<String>> getProxyGroups() {
+    return proxyGroups;
+  }
+
+  @VisibleForTesting
+  public static Map<String, Collection<String>> getProxyHosts() {
+    return proxyHosts;
+  }
 }
 }

+ 20 - 9
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java

@@ -33,6 +33,8 @@ import org.apache.hadoop.security.KerberosInfo;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
 
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
 /**
  * An authorization manager which handles service-level authorization
  * An authorization manager which handles service-level authorization
  * for incoming service requests.
  * for incoming service requests.
@@ -120,19 +122,23 @@ public class ServiceAuthorizationManager {
     // Make a copy of the original config, and load the policy file
     // Make a copy of the original config, and load the policy file
     Configuration policyConf = new Configuration(conf);
     Configuration policyConf = new Configuration(conf);
     policyConf.addResource(policyFile);
     policyConf.addResource(policyFile);
-    
+    refreshWithConfiguration(policyConf, provider);
+  }
+
+  public synchronized void refreshWithConfiguration(Configuration conf,
+      PolicyProvider provider) {
     final Map<Class<?>, AccessControlList> newAcls =
     final Map<Class<?>, AccessControlList> newAcls =
-      new IdentityHashMap<Class<?>, AccessControlList>();
+        new IdentityHashMap<Class<?>, AccessControlList>();
 
 
     // Parse the config file
     // Parse the config file
     Service[] services = provider.getServices();
     Service[] services = provider.getServices();
     if (services != null) {
     if (services != null) {
       for (Service service : services) {
       for (Service service : services) {
-        AccessControlList acl = 
-          new AccessControlList(
-              policyConf.get(service.getServiceKey(), 
-                             AccessControlList.WILDCARD_ACL_VALUE)
-              );
+        AccessControlList acl =
+            new AccessControlList(
+                conf.get(service.getServiceKey(),
+                    AccessControlList.WILDCARD_ACL_VALUE)
+            );
         newAcls.put(service.getProtocol(), acl);
         newAcls.put(service.getProtocol(), acl);
       }
       }
     }
     }
@@ -141,8 +147,13 @@ public class ServiceAuthorizationManager {
     protocolToAcl = newAcls;
     protocolToAcl = newAcls;
   }
   }
 
 
-  // Package-protected for use in tests.
-  Set<Class<?>> getProtocolsWithAcls() {
+  @VisibleForTesting
+  public Set<Class<?>> getProtocolsWithAcls() {
     return protocolToAcl.keySet();
     return protocolToAcl.keySet();
   }
   }
+
+  @VisibleForTesting
+  public AccessControlList getProtocolsAcls(Class<?> className) {
+    return protocolToAcl.get(className);
+  }
 }
 }

+ 2 - 3
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/CompositeService.java

@@ -19,7 +19,6 @@
 package org.apache.hadoop.service;
 package org.apache.hadoop.service;
 
 
 import java.util.ArrayList;
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.List;
 import java.util.List;
 
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
@@ -54,13 +53,13 @@ public class CompositeService extends AbstractService {
   }
   }
 
 
   /**
   /**
-   * Get an unmodifiable list of services
+   * Get a cloned list of services
    * @return a list of child services at the time of invocation -
    * @return a list of child services at the time of invocation -
    * added services will not be picked up.
    * added services will not be picked up.
    */
    */
   public List<Service> getServices() {
   public List<Service> getServices() {
     synchronized (serviceList) {
     synchronized (serviceList) {
-      return Collections.unmodifiableList(serviceList);
+      return new ArrayList<Service>(serviceList);
     }
     }
   }
   }
 
 

+ 125 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CloseableReferenceCount.java

@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import java.nio.channels.AsynchronousCloseException;
+import java.nio.channels.ClosedChannelException;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * A closeable object that maintains a reference count.
+ *
+ * Once the object is closed, attempting to take a new reference will throw
+ * ClosedChannelException.
+ */
+public class CloseableReferenceCount {
+  /**
+   * Bit mask representing a closed domain socket.
+   */
+  private static final int STATUS_CLOSED_MASK = 1 << 30;
+
+  /**
+   * The status bits.
+   *
+   * Bit 30: 0 = open, 1 = closed.
+   * Bits 29 to 0: the reference count.
+   */
+  private final AtomicInteger status = new AtomicInteger(0);
+
+  public CloseableReferenceCount() { }
+
+  /**
+   * Increment the reference count.
+   *
+   * @throws ClosedChannelException      If the status is closed.
+   */
+  public void reference() throws ClosedChannelException {
+    int curBits = status.incrementAndGet();
+    if ((curBits & STATUS_CLOSED_MASK) != 0) {
+      status.decrementAndGet();
+      throw new ClosedChannelException();
+    }
+  }
+
+  /**
+   * Decrement the reference count.
+   *
+   * @return          True if the object is closed and has no outstanding
+   *                  references.
+   */
+  public boolean unreference() {
+    int newVal = status.decrementAndGet();
+    Preconditions.checkState(newVal != 0xffffffff,
+        "called unreference when the reference count was already at 0.");
+    return newVal == STATUS_CLOSED_MASK;
+  }
+
+  /**
+   * Decrement the reference count, checking to make sure that the
+   * CloseableReferenceCount is not closed.
+   *
+   * @throws AsynchronousCloseException  If the status is closed.
+   */
+  public void unreferenceCheckClosed() throws ClosedChannelException {
+    int newVal = status.decrementAndGet();
+    if ((newVal & STATUS_CLOSED_MASK) != 0) {
+      throw new AsynchronousCloseException();
+    }
+  }
+
+  /**
+   * Return true if the status is currently open.
+   *
+   * @return                 True if the status is currently open.
+   */
+  public boolean isOpen() {
+    return ((status.get() & STATUS_CLOSED_MASK) == 0);
+  }
+
+  /**
+   * Mark the status as closed.
+   *
+   * Once the status is closed, it cannot be reopened.
+   *
+   * @return                         The current reference count.
+   * @throws ClosedChannelException  If someone else closes the object
+   *                                 before we do.
+   */
+  public int setClosed() throws ClosedChannelException {
+    while (true) {
+      int curBits = status.get();
+      if ((curBits & STATUS_CLOSED_MASK) != 0) {
+        throw new ClosedChannelException();
+      }
+      if (status.compareAndSet(curBits, curBits | STATUS_CLOSED_MASK)) {
+        return curBits & (~STATUS_CLOSED_MASK);
+      }
+    }
+  }
+
+  /**
+   * Get the current reference count.
+   *
+   * @return                 The current reference count.
+   */
+  public int getReferenceCount() {
+    return status.get() & (~STATUS_CLOSED_MASK);
+  }
+}

+ 54 - 1
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/NativeIO.c

@@ -18,6 +18,7 @@
 
 
 #include "org_apache_hadoop.h"
 #include "org_apache_hadoop.h"
 #include "org_apache_hadoop_io_nativeio_NativeIO.h"
 #include "org_apache_hadoop_io_nativeio_NativeIO.h"
+#include "org_apache_hadoop_io_nativeio_NativeIO_POSIX.h"
 
 
 #ifdef UNIX
 #ifdef UNIX
 #include <assert.h>
 #include <assert.h>
@@ -49,6 +50,10 @@
 #include "file_descriptor.h"
 #include "file_descriptor.h"
 #include "errno_enum.h"
 #include "errno_enum.h"
 
 
+#define MMAP_PROT_READ org_apache_hadoop_io_nativeio_NativeIO_POSIX_MMAP_PROT_READ
+#define MMAP_PROT_WRITE org_apache_hadoop_io_nativeio_NativeIO_POSIX_MMAP_PROT_WRITE
+#define MMAP_PROT_EXEC org_apache_hadoop_io_nativeio_NativeIO_POSIX_MMAP_PROT_EXEC
+
 // the NativeIO$POSIX$Stat inner class and its constructor
 // the NativeIO$POSIX$Stat inner class and its constructor
 static jclass stat_clazz;
 static jclass stat_clazz;
 static jmethodID stat_ctor;
 static jmethodID stat_ctor;
@@ -661,6 +666,55 @@ cleanup:
 #endif
 #endif
 }
 }
 
 
+JNIEXPORT jlong JNICALL 
+Java_org_apache_hadoop_io_nativeio_NativeIO_00024POSIX_mmap(
+  JNIEnv *env, jclass clazz, jobject jfd, jint jprot,
+  jboolean jshared, jlong length)
+{
+#ifdef UNIX
+  void *addr = 0;
+  int prot, flags, fd;
+  
+  prot = ((jprot & MMAP_PROT_READ) ? PROT_READ : 0) |
+         ((jprot & MMAP_PROT_WRITE) ? PROT_WRITE : 0) |
+         ((jprot & MMAP_PROT_EXEC) ? PROT_EXEC : 0);
+  flags = (jshared == JNI_TRUE) ? MAP_SHARED : MAP_PRIVATE;
+  fd = fd_get(env, jfd);
+  addr = mmap(NULL, length, prot, flags, fd, 0);
+  if (addr == MAP_FAILED) {
+    throw_ioe(env, errno);
+  }
+  return (jlong)(intptr_t)addr;
+#endif  //   UNIX
+
+#ifdef WINDOWS
+  THROW(env, "java/io/IOException",
+    "The function POSIX.mmap() is not supported on Windows");
+  return NULL;
+#endif
+}
+
+JNIEXPORT void JNICALL 
+Java_org_apache_hadoop_io_nativeio_NativeIO_00024POSIX_munmap(
+  JNIEnv *env, jclass clazz, jlong jaddr, jlong length)
+{
+#ifdef UNIX
+  void *addr;
+
+  addr = (void*)(intptr_t)jaddr;
+  if (munmap(addr, length) < 0) {
+    throw_ioe(env, errno);
+  }
+#endif  //   UNIX
+
+#ifdef WINDOWS
+  THROW(env, "java/io/IOException",
+    "The function POSIX.munmap() is not supported on Windows");
+  return NULL;
+#endif
+}
+
+
 /*
 /*
  * static native String getGroupName(int gid);
  * static native String getGroupName(int gid);
  *
  *
@@ -1012,4 +1066,3 @@ JNIEnv *env, jclass clazz)
 /**
 /**
  * vim: sw=2: ts=2: et:
  * vim: sw=2: ts=2: et:
  */
  */
-

+ 162 - 0
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/SharedFileDescriptorFactory.c

@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "org_apache_hadoop.h"
+
+#ifdef UNIX
+
+#include "exception.h"
+#include "file_descriptor.h"
+#include "org_apache_hadoop.h"
+#include "org_apache_hadoop_io_nativeio_SharedFileDescriptorFactory.h"
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+static pthread_mutex_t g_rand_lock = PTHREAD_MUTEX_INITIALIZER;
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_io_nativeio_SharedFileDescriptorFactory_deleteStaleTemporaryFiles0(
+  JNIEnv *env, jclass clazz, jstring jprefix, jstring jpath)
+{
+  const char *prefix = NULL, *path = NULL;
+  char target[PATH_MAX];
+  jthrowable jthr;
+  DIR *dp = NULL;
+  struct dirent *de;
+
+  prefix = (*env)->GetStringUTFChars(env, jprefix, NULL);
+  if (!prefix) goto done; // exception raised
+  path = (*env)->GetStringUTFChars(env, jpath, NULL);
+  if (!path) goto done; // exception raised
+
+  dp = opendir(path);
+  if (!dp) {
+    int ret = errno;
+    jthr = newIOException(env, "opendir(%s) error %d: %s",
+                          path, ret, terror(ret));
+    (*env)->Throw(env, jthr);
+    goto done;
+  }
+  while ((de = readdir(dp))) {
+    if (strncmp(prefix, de->d_name, strlen(prefix)) == 0) {
+      int ret = snprintf(target, PATH_MAX, "%s/%s", path, de->d_name);
+      if ((0 < ret) && (ret < PATH_MAX)) {
+        unlink(target);
+      }
+    }
+  }
+
+done:
+  if (dp) {
+    closedir(dp);
+  }
+  if (prefix) {
+    (*env)->ReleaseStringUTFChars(env, jprefix, prefix);
+  }
+  if (path) {
+    (*env)->ReleaseStringUTFChars(env, jpath, path);
+  }
+}
+
+JNIEXPORT jobject JNICALL
+Java_org_apache_hadoop_io_nativeio_SharedFileDescriptorFactory_createDescriptor0(
+  JNIEnv *env, jclass clazz, jstring jprefix, jstring jpath, jint length)
+{
+  const char *prefix = NULL, *path = NULL;
+  char target[PATH_MAX];
+  int ret, fd = -1, rnd;
+  jthrowable jthr;
+  jobject jret = NULL;
+
+  prefix = (*env)->GetStringUTFChars(env, jprefix, NULL);
+  if (!prefix) goto done; // exception raised
+  path = (*env)->GetStringUTFChars(env, jpath, NULL);
+  if (!path) goto done; // exception raised
+
+  pthread_mutex_lock(&g_rand_lock);
+  rnd = rand();
+  pthread_mutex_unlock(&g_rand_lock);
+  while (1) {
+    ret = snprintf(target, PATH_MAX, "%s/%s_%d",
+                   path, prefix, rnd);
+    if (ret < 0) {
+      jthr = newIOException(env, "snprintf error");
+      (*env)->Throw(env, jthr);
+      goto done;
+    } else if (ret >= PATH_MAX) {
+      jthr = newIOException(env, "computed path was too long.");
+      (*env)->Throw(env, jthr);
+      goto done;
+    }
+    fd = open(target, O_CREAT | O_EXCL | O_RDWR, 0700);
+    if (fd >= 0) break; // success
+    ret = errno;
+    if (ret == EEXIST) {
+      // Bad luck -- we got a very rare collision here between us and 
+      // another DataNode (or process).  Try again.
+      continue;
+    } else if (ret == EINTR) {
+      // Most of the time, this error is only possible when opening FIFOs.
+      // But let's be thorough.
+      continue;
+    }
+    jthr = newIOException(env, "open(%s, O_CREAT | O_EXCL | O_RDWR) "
+            "failed: error %d (%s)", target, ret, terror(ret));
+    (*env)->Throw(env, jthr);
+    goto done;
+  }
+  if (unlink(target) < 0) {
+    jthr = newIOException(env, "unlink(%s) failed: error %d (%s)",
+                          path, ret, terror(ret));
+    (*env)->Throw(env, jthr);
+    goto done;
+  }
+  if (ftruncate(fd, length) < 0) {
+    jthr = newIOException(env, "ftruncate(%s, %d) failed: error %d (%s)",
+                          path, length, ret, terror(ret));
+    (*env)->Throw(env, jthr);
+    goto done;
+  }
+  jret = fd_create(env, fd); // throws exception on error.
+
+done:
+  if (prefix) {
+    (*env)->ReleaseStringUTFChars(env, jprefix, prefix);
+  }
+  if (path) {
+    (*env)->ReleaseStringUTFChars(env, jpath, path);
+  }
+  if (!jret) {
+    if (fd >= 0) {
+      close(fd);
+    }
+  }
+  return jret;
+}
+
+#endif

+ 247 - 0
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocketWatcher.c

@@ -0,0 +1,247 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "exception.h"
+#include "org_apache_hadoop.h"
+#include "org_apache_hadoop_net_unix_DomainSocketWatcher.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <jni.h>
+#include <poll.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+static jfieldID fd_set_data_fid;
+
+#define FD_SET_DATA_MIN_SIZE 2
+
+struct fd_set_data {
+  /**
+   * Number of fds we have allocated space for.
+   */
+  int alloc_size;
+
+  /**
+   * Number of fds actually in use.
+   */
+  int used_size;
+
+  /**
+   * Beginning of pollfd data.
+   */
+  struct pollfd pollfd[0];
+};
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocketWatcher_anchorNative(
+JNIEnv *env, jclass clazz)
+{
+  jclass fd_set_class;
+
+  fd_set_class = (*env)->FindClass(env,
+          "org/apache/hadoop/net/unix/DomainSocketWatcher$FdSet");
+  if (!fd_set_class) return; // exception raised
+  fd_set_data_fid = (*env)->GetFieldID(env, fd_set_class, "data", "J");
+  if (!fd_set_data_fid) return; // exception raised
+}
+
+JNIEXPORT jlong JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocketWatcher_00024FdSet_alloc0(
+JNIEnv *env, jclass clazz)
+{
+  struct fd_set_data *sd;
+
+  sd = calloc(1, sizeof(struct fd_set_data) +
+              (sizeof(struct pollfd) * FD_SET_DATA_MIN_SIZE));
+  if (!sd) {
+    (*env)->Throw(env, newRuntimeException(env, "out of memory allocating "
+                                            "DomainSocketWatcher#FdSet"));
+    return 0L;
+  }
+  sd->alloc_size = FD_SET_DATA_MIN_SIZE;
+  sd->used_size = 0;
+  return (jlong)(intptr_t)sd;
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocketWatcher_00024FdSet_add(
+JNIEnv *env, jobject obj, jint fd)
+{
+  struct fd_set_data *sd, *nd;
+  struct pollfd *pollfd;
+
+  sd = (struct fd_set_data*)(intptr_t)(*env)->
+    GetLongField(env, obj, fd_set_data_fid);
+  if (sd->used_size + 1 > sd->alloc_size) {
+    nd = realloc(sd, sizeof(struct fd_set_data) +
+            (sizeof(struct pollfd) * sd->alloc_size * 2));
+    if (!nd) {
+      (*env)->Throw(env, newRuntimeException(env, "out of memory adding "
+            "another fd to DomainSocketWatcher#FdSet.  we have %d already",
+            sd->alloc_size));
+      return;
+    }
+    nd->alloc_size = nd->alloc_size * 2;
+    (*env)->SetLongField(env, obj, fd_set_data_fid, (jlong)(intptr_t)nd);
+    sd = nd;
+  }
+  pollfd = &sd->pollfd[sd->used_size];
+  sd->used_size++;
+  pollfd->fd = fd;
+  pollfd->events = POLLIN;
+  pollfd->revents = 0;
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocketWatcher_00024FdSet_remove(
+JNIEnv *env, jobject obj, jint fd)
+{
+  struct fd_set_data *sd;
+  struct pollfd *pollfd, *last_pollfd;
+  int used_size, i;
+
+  sd = (struct fd_set_data*)(intptr_t)(*env)->
+      GetLongField(env, obj, fd_set_data_fid);
+  used_size = sd->used_size;
+  for (i = 0; i < used_size; i++) {
+    pollfd = sd->pollfd + i;
+    if (pollfd->fd == fd) break;
+  }
+  if (i == used_size) {
+    (*env)->Throw(env, newRuntimeException(env, "failed to remove fd %d "
+          "from the FdSet because it was never present.", fd));
+    return;
+  }
+  last_pollfd = sd->pollfd + (used_size - 1);
+  if (used_size > 1) {
+    // Move last pollfd to the new empty slot if needed
+    pollfd->fd = last_pollfd->fd;
+    pollfd->events = last_pollfd->events;
+    pollfd->revents = last_pollfd->revents;
+  }
+  memset(last_pollfd, 0, sizeof(struct pollfd));
+  sd->used_size--;
+}
+
+JNIEXPORT jobject JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocketWatcher_00024FdSet_getAndClearReadableFds(
+JNIEnv *env, jobject obj)
+{
+  int *carr = NULL;
+  jobject jarr = NULL;
+  struct fd_set_data *sd;
+  int used_size, num_readable = 0, i, j;
+  jthrowable jthr = NULL;
+
+  sd = (struct fd_set_data*)(intptr_t)(*env)->
+      GetLongField(env, obj, fd_set_data_fid);
+  used_size = sd->used_size;
+  for (i = 0; i < used_size; i++) {
+    if (sd->pollfd[i].revents & POLLIN) {
+      num_readable++;
+    } else {
+      sd->pollfd[i].revents = 0;
+    }
+  }
+  if (num_readable > 0) {
+    carr = malloc(sizeof(int) * num_readable);
+    if (!carr) {
+      jthr = newRuntimeException(env, "failed to allocate a temporary array "
+            "of %d ints", num_readable);
+      goto done;
+    }
+    j = 0;
+    for (i = 0; ((i < used_size) && (j < num_readable)); i++) {
+      if (sd->pollfd[i].revents & POLLIN) {
+        carr[j] = sd->pollfd[i].fd;
+        j++;
+        sd->pollfd[i].revents = 0;
+      }
+    }
+    if (j != num_readable) {
+      jthr = newRuntimeException(env, "failed to fill entire carr "
+            "array of size %d: only filled %d elements", num_readable, j);
+      goto done;
+    }
+  }
+  jarr = (*env)->NewIntArray(env, num_readable);
+  if (!jarr) {
+    jthr = (*env)->ExceptionOccurred(env);
+    (*env)->ExceptionClear(env);
+    goto done;
+  }
+  if (num_readable > 0) {
+    (*env)->SetIntArrayRegion(env, jarr, 0, num_readable, carr);
+    jthr = (*env)->ExceptionOccurred(env);
+    if (jthr) {
+      (*env)->ExceptionClear(env);
+      goto done;
+    }
+  }
+
+done:
+  free(carr);
+  if (jthr) {
+    (*env)->DeleteLocalRef(env, jarr);
+    jarr = NULL;
+  }
+  return jarr;
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocketWatcher_00024FdSet_close(
+JNIEnv *env, jobject obj)
+{
+  struct fd_set_data *sd;
+
+  sd = (struct fd_set_data*)(intptr_t)(*env)->
+      GetLongField(env, obj, fd_set_data_fid);
+  if (sd) {
+    free(sd);
+    (*env)->SetLongField(env, obj, fd_set_data_fid, 0L);
+  }
+}
+
+JNIEXPORT jint JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocketWatcher_doPoll0(
+JNIEnv *env, jclass clazz, jint checkMs, jobject fdSet)
+{
+  struct fd_set_data *sd;
+  int ret, err;
+
+  sd = (struct fd_set_data*)(intptr_t)(*env)->
+      GetLongField(env, fdSet, fd_set_data_fid);
+  ret = poll(sd->pollfd, sd->used_size, checkMs);
+  if (ret >= 0) {
+    return ret;
+  }
+  err = errno;
+  if (err != EINTR) { // treat EINTR as 0 fds ready
+    (*env)->Throw(env, newIOException(env,
+            "poll(2) failed with error code %d: %s", err, terror(err)));
+  }
+  return 0;
+}

+ 2 - 1
hadoop-common-project/hadoop-common/src/site/apt/SecureMode.apt.vm

@@ -352,7 +352,8 @@ Configuration for <<<conf/core-site.xml>>>
 | | | This value is deprecated. Use dfs.http.policy |
 | | | This value is deprecated. Use dfs.http.policy |
 *-------------------------+-------------------------+------------------------+
 *-------------------------+-------------------------+------------------------+
 | <<<dfs.http.policy>>> | <HTTP_ONLY> or <HTTPS_ONLY> or <HTTP_AND_HTTPS> | |
 | <<<dfs.http.policy>>> | <HTTP_ONLY> or <HTTPS_ONLY> or <HTTP_AND_HTTPS> | |
-| | | HTTPS_ONLY turns off http access |
+| | | HTTPS_ONLY turns off http access. This option takes precedence over |
+| | | the deprecated configuration dfs.https.enable and hadoop.ssl.enabled. |
 *-------------------------+-------------------------+------------------------+
 *-------------------------+-------------------------+------------------------+
 | <<<dfs.namenode.https-address>>> | <nn_host_fqdn:50470> | |
 | <<<dfs.namenode.https-address>>> | <nn_host_fqdn:50470> | |
 *-------------------------+-------------------------+------------------------+
 *-------------------------+-------------------------+------------------------+

+ 7 - 7
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyShell.java

@@ -41,7 +41,7 @@ public class TestKeyShell {
   
   
   @Test
   @Test
   public void testKeySuccessfulKeyLifecycle() throws Exception {
   public void testKeySuccessfulKeyLifecycle() throws Exception {
-    outContent.flush();
+    outContent.reset();
     String[] args1 = {"create", "key1", "--provider", 
     String[] args1 = {"create", "key1", "--provider", 
         "jceks://file" + tmpDir + "/keystore.jceks"};
         "jceks://file" + tmpDir + "/keystore.jceks"};
     int rc = 0;
     int rc = 0;
@@ -52,14 +52,14 @@ public class TestKeyShell {
     assertTrue(outContent.toString().contains("key1 has been successfully " +
     assertTrue(outContent.toString().contains("key1 has been successfully " +
     		"created."));
     		"created."));
 
 
-    outContent.flush();
+    outContent.reset();
     String[] args2 = {"list", "--provider", 
     String[] args2 = {"list", "--provider", 
         "jceks://file" + tmpDir + "/keystore.jceks"};
         "jceks://file" + tmpDir + "/keystore.jceks"};
     rc = ks.run(args2);
     rc = ks.run(args2);
     assertEquals(0, rc);
     assertEquals(0, rc);
     assertTrue(outContent.toString().contains("key1"));
     assertTrue(outContent.toString().contains("key1"));
 
 
-    outContent.flush();
+    outContent.reset();
     String[] args3 = {"roll", "key1", "--provider", 
     String[] args3 = {"roll", "key1", "--provider", 
         "jceks://file" + tmpDir + "/keystore.jceks"};
         "jceks://file" + tmpDir + "/keystore.jceks"};
     rc = ks.run(args3);
     rc = ks.run(args3);
@@ -67,7 +67,7 @@ public class TestKeyShell {
     assertTrue(outContent.toString().contains("key1 has been successfully " +
     assertTrue(outContent.toString().contains("key1 has been successfully " +
     		"rolled."));
     		"rolled."));
 
 
-    outContent.flush();
+    outContent.reset();
     String[] args4 = {"delete", "key1", "--provider", 
     String[] args4 = {"delete", "key1", "--provider", 
         "jceks://file" + tmpDir + "/keystore.jceks"};
         "jceks://file" + tmpDir + "/keystore.jceks"};
     rc = ks.run(args4);
     rc = ks.run(args4);
@@ -75,12 +75,12 @@ public class TestKeyShell {
     assertTrue(outContent.toString().contains("key1 has been successfully " +
     assertTrue(outContent.toString().contains("key1 has been successfully " +
     		"deleted."));
     		"deleted."));
 
 
-    outContent.flush();
+    outContent.reset();
     String[] args5 = {"list", "--provider", 
     String[] args5 = {"list", "--provider", 
         "jceks://file" + tmpDir + "/keystore.jceks"};
         "jceks://file" + tmpDir + "/keystore.jceks"};
     rc = ks.run(args5);
     rc = ks.run(args5);
     assertEquals(0, rc);
     assertEquals(0, rc);
-    assertTrue(outContent.toString().contains("key1"));
+    assertFalse(outContent.toString(), outContent.toString().contains("key1"));
   }
   }
   
   
   @Test
   @Test
@@ -165,7 +165,7 @@ public class TestKeyShell {
     assertTrue(outContent.toString().contains("key1 has been successfully " +
     assertTrue(outContent.toString().contains("key1 has been successfully " +
     		"created."));
     		"created."));
 
 
-    outContent.flush();
+    outContent.reset();
     String[] args2 = {"delete", "key1", "--provider", 
     String[] args2 = {"delete", "key1", "--provider", 
         "jceks://file" + tmpDir + "/keystore.jceks"};
         "jceks://file" + tmpDir + "/keystore.jceks"};
     rc = ks.run(args2);
     rc = ks.run(args2);

+ 82 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/nativeio/TestSharedFileDescriptorFactory.java

@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.nativeio;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+
+import org.junit.Assert;
+import org.junit.Assume;
+import org.junit.Test;
+import org.apache.commons.lang.SystemUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+
+public class TestSharedFileDescriptorFactory {
+  static final Log LOG = LogFactory.getLog(TestSharedFileDescriptorFactory.class);
+
+  private static final File TEST_BASE =
+      new File(System.getProperty("test.build.data", "/tmp"));
+
+  @Test(timeout=10000)
+  public void testReadAndWrite() throws Exception {
+    Assume.assumeTrue(NativeIO.isAvailable());
+    Assume.assumeTrue(SystemUtils.IS_OS_UNIX);
+    File path = new File(TEST_BASE, "testReadAndWrite");
+    path.mkdirs();
+    SharedFileDescriptorFactory factory =
+        new SharedFileDescriptorFactory("woot_", path.getAbsolutePath());
+    FileInputStream inStream = factory.createDescriptor(4096);
+    FileOutputStream outStream = new FileOutputStream(inStream.getFD());
+    outStream.write(101);
+    inStream.getChannel().position(0);
+    Assert.assertEquals(101, inStream.read());
+    inStream.close();
+    outStream.close();
+    FileUtil.fullyDelete(path);
+  }
+
+  static private void createTempFile(String path) throws Exception {
+    FileOutputStream fos = new FileOutputStream(path);
+    fos.write(101);
+    fos.close();
+  }
+  
+  @Test(timeout=10000)
+  public void testCleanupRemainders() throws Exception {
+    Assume.assumeTrue(NativeIO.isAvailable());
+    Assume.assumeTrue(SystemUtils.IS_OS_UNIX);
+    File path = new File(TEST_BASE, "testCleanupRemainders");
+    path.mkdirs();
+    String remainder1 = path.getAbsolutePath() + 
+        Path.SEPARATOR + "woot2_remainder1";
+    String remainder2 = path.getAbsolutePath() +
+        Path.SEPARATOR + "woot2_remainder2";
+    createTempFile(remainder1);
+    createTempFile(remainder2);
+    new SharedFileDescriptorFactory("woot2_", path.getAbsolutePath());
+    // creating the SharedFileDescriptorFactory should have removed 
+    // the remainders
+    Assert.assertFalse(new File(remainder1).exists());
+    Assert.assertFalse(new File(remainder2).exists());
+    FileUtil.fullyDelete(path);
+  }
+}

+ 150 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocketWatcher.java

@@ -0,0 +1,150 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.net.unix;
+
+import java.util.ArrayList;
+import java.util.Random;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.locks.ReentrantLock;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.junit.Assume;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.common.util.concurrent.Uninterruptibles;
+
+public class TestDomainSocketWatcher {
+  static final Log LOG = LogFactory.getLog(TestDomainSocketWatcher.class);
+
+  @Before
+  public void before() {
+    Assume.assumeTrue(DomainSocket.getLoadingFailureReason() == null);
+  }
+
+  /**
+   * Test that we can create a DomainSocketWatcher and then shut it down.
+   */
+  @Test(timeout=60000)
+  public void testCreateShutdown() throws Exception {
+    DomainSocketWatcher watcher = new DomainSocketWatcher(10000000);
+    watcher.close();
+  }
+
+  /**
+   * Test that we can get notifications out a DomainSocketWatcher.
+   */
+  @Test(timeout=180000)
+  public void testDeliverNotifications() throws Exception {
+    DomainSocketWatcher watcher = new DomainSocketWatcher(10000000);
+    DomainSocket pair[] = DomainSocket.socketpair();
+    final CountDownLatch latch = new CountDownLatch(1);
+    watcher.add(pair[1], new DomainSocketWatcher.Handler() {
+      @Override
+      public boolean handle(DomainSocket sock) {
+        latch.countDown();
+        return true;
+      }
+    });
+    pair[0].close();
+    latch.await();
+    watcher.close();
+  }
+
+  /**
+   * Test that a java interruption can stop the watcher thread
+   */
+  @Test(timeout=60000)
+  public void testInterruption() throws Exception {
+    DomainSocketWatcher watcher = new DomainSocketWatcher(10);
+    watcher.interrupt();
+    Uninterruptibles.joinUninterruptibly(watcher);
+  }
+  
+  @Test(timeout=300000)
+  public void testStress() throws Exception {
+    final int SOCKET_NUM = 250;
+    final ReentrantLock lock = new ReentrantLock();
+    final DomainSocketWatcher watcher = new DomainSocketWatcher(10000000);
+    final ArrayList<DomainSocket[]> pairs = new ArrayList<DomainSocket[]>();
+    final AtomicInteger handled = new AtomicInteger(0);
+
+    final Thread adderThread = new Thread(new Runnable() {
+      @Override
+      public void run() {
+        try {
+          for (int i = 0; i < SOCKET_NUM; i++) {
+            DomainSocket pair[] = DomainSocket.socketpair();
+            watcher.add(pair[1], new DomainSocketWatcher.Handler() {
+              @Override
+              public boolean handle(DomainSocket sock) {
+                handled.incrementAndGet();
+                return true;
+              }
+            });
+            lock.lock();
+            try {
+              pairs.add(pair);
+            } finally {
+              lock.unlock();
+            }
+          }
+        } catch (Throwable e) {
+          LOG.error(e);
+          throw new RuntimeException(e);
+        }
+      }
+    });
+    
+    final Thread removerThread = new Thread(new Runnable() {
+      @Override
+      public void run() {
+        final Random random = new Random();
+        try {
+          while (handled.get() != SOCKET_NUM) {
+            lock.lock();
+            try {
+              if (!pairs.isEmpty()) {
+                int idx = random.nextInt(pairs.size());
+                DomainSocket pair[] = pairs.remove(idx);
+                if (random.nextBoolean()) {
+                  pair[0].close();
+                } else {
+                  watcher.remove(pair[1]);
+                }
+              }
+            } finally {
+              lock.unlock();
+            }
+          }
+        } catch (Throwable e) {
+          LOG.error(e);
+          throw new RuntimeException(e);
+        }
+      }
+    });
+
+    adderThread.start();
+    removerThread.start();
+    Uninterruptibles.joinUninterruptibly(adderThread);
+    Uninterruptibles.joinUninterruptibly(removerThread);
+    watcher.close();
+  }
+}

+ 210 - 18
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestCompositeService.java → hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/service/TestCompositeService.java

@@ -16,26 +16,20 @@
  * limitations under the License.
  * limitations under the License.
  */
  */
 
 
-package org.apache.hadoop.yarn.util;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
+package org.apache.hadoop.service;
 
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.service.AbstractService;
-import org.apache.hadoop.service.BreakableService;
-import org.apache.hadoop.service.CompositeService;
-import org.apache.hadoop.service.Service;
-import org.apache.hadoop.service.ServiceStateException;
 import org.apache.hadoop.service.Service.STATE;
 import org.apache.hadoop.service.Service.STATE;
-import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.junit.Before;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.Test;
 
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
 public class TestCompositeService {
 public class TestCompositeService {
 
 
   private static final int NUM_OF_SERVICES = 5;
   private static final int NUM_OF_SERVICES = 5;
@@ -156,7 +150,7 @@ public class TestCompositeService {
     try {
     try {
       serviceManager.start();
       serviceManager.start();
       fail("Exception should have been thrown due to startup failure of last service");
       fail("Exception should have been thrown due to startup failure of last service");
-    } catch (YarnRuntimeException e) {
+    } catch (ServiceTestRuntimeException e) {
       for (int i = 0; i < NUM_OF_SERVICES - 1; i++) {
       for (int i = 0; i < NUM_OF_SERVICES - 1; i++) {
         if (i >= FAILED_SERVICE_SEQ_NUMBER && STOP_ONLY_STARTED_SERVICES) {
         if (i >= FAILED_SERVICE_SEQ_NUMBER && STOP_ONLY_STARTED_SERVICES) {
           // Failed service state should be INITED
           // Failed service state should be INITED
@@ -197,7 +191,7 @@ public class TestCompositeService {
     // Stop the composite service
     // Stop the composite service
     try {
     try {
       serviceManager.stop();
       serviceManager.stop();
-    } catch (YarnRuntimeException e) {
+    } catch (ServiceTestRuntimeException e) {
     }
     }
     assertInState(STATE.STOPPED, services);
     assertInState(STATE.STOPPED, services);
   }
   }
@@ -335,7 +329,41 @@ public class TestCompositeService {
 
 
     testService.init(new Configuration());
     testService.init(new Configuration());
     assertEquals("Incorrect number of services",
     assertEquals("Incorrect number of services",
-        1, testService.getServices().size());
+                 1, testService.getServices().size());
+  }
+
+  @Test(timeout = 1000)
+  public void testAddInitedSiblingInInit() throws Throwable {
+    CompositeService parent = new CompositeService("parent");
+    BreakableService sibling = new BreakableService();
+    sibling.init(new Configuration());
+    parent.addService(new AddSiblingService(parent,
+                                            sibling,
+                                            STATE.INITED));
+    parent.init(new Configuration());
+    parent.start();
+    parent.stop();
+    assertEquals("Incorrect number of services",
+                 2, parent.getServices().size());
+  }
+
+  @Test(timeout = 1000)
+  public void testAddUninitedSiblingInInit() throws Throwable {
+    CompositeService parent = new CompositeService("parent");
+    BreakableService sibling = new BreakableService();
+    parent.addService(new AddSiblingService(parent,
+                                            sibling,
+                                            STATE.INITED));
+    parent.init(new Configuration());
+    try {
+      parent.start();
+      fail("Expected an exception, got " + parent);
+    } catch (ServiceStateException e) {
+      //expected
+    }
+    parent.stop();
+    assertEquals("Incorrect number of services",
+                 2, parent.getServices().size());
   }
   }
 
 
   @Test
   @Test
@@ -365,6 +393,105 @@ public class TestCompositeService {
         2, testService.getServices().size());
         2, testService.getServices().size());
   }
   }
 
 
+  @Test(timeout = 1000)
+  public void testAddStartedChildBeforeInit() throws Throwable {
+    CompositeService parent = new CompositeService("parent");
+    BreakableService child = new BreakableService();
+    child.init(new Configuration());
+    child.start();
+    AddSiblingService.addChildToService(parent, child);
+    try {
+      parent.init(new Configuration());
+      fail("Expected an exception, got " + parent);
+    } catch (ServiceStateException e) {
+      //expected
+    }
+    parent.stop();
+  }
+
+  @Test(timeout = 1000)
+  public void testAddStoppedChildBeforeInit() throws Throwable {
+    CompositeService parent = new CompositeService("parent");
+    BreakableService child = new BreakableService();
+    child.init(new Configuration());
+    child.start();
+    child.stop();
+    AddSiblingService.addChildToService(parent, child);
+    try {
+      parent.init(new Configuration());
+      fail("Expected an exception, got " + parent);
+    } catch (ServiceStateException e) {
+      //expected
+    }
+    parent.stop();
+  }
+
+  @Test(timeout = 1000)
+  public void testAddStartedSiblingInStart() throws Throwable {
+    CompositeService parent = new CompositeService("parent");
+    BreakableService sibling = new BreakableService();
+    sibling.init(new Configuration());
+    sibling.start();
+    parent.addService(new AddSiblingService(parent,
+                                            sibling,
+                                            STATE.STARTED));
+    parent.init(new Configuration());
+    parent.start();
+    parent.stop();
+    assertEquals("Incorrect number of services",
+                 2, parent.getServices().size());
+  }
+
+  @Test(timeout = 1000)
+  public void testAddUninitedSiblingInStart() throws Throwable {
+    CompositeService parent = new CompositeService("parent");
+    BreakableService sibling = new BreakableService();
+    parent.addService(new AddSiblingService(parent,
+                                            sibling,
+                                            STATE.STARTED));
+    parent.init(new Configuration());
+    assertInState(STATE.NOTINITED, sibling);
+    parent.start();
+    parent.stop();
+    assertEquals("Incorrect number of services",
+                 2, parent.getServices().size());
+  }
+
+  @Test(timeout = 1000)
+  public void testAddStartedSiblingInInit() throws Throwable {
+    CompositeService parent = new CompositeService("parent");
+    BreakableService sibling = new BreakableService();
+    sibling.init(new Configuration());
+    sibling.start();
+    parent.addService(new AddSiblingService(parent,
+                                            sibling,
+                                            STATE.INITED));
+    parent.init(new Configuration());
+    assertInState(STATE.STARTED, sibling);
+    parent.start();
+    assertInState(STATE.STARTED, sibling);
+    parent.stop();
+    assertEquals("Incorrect number of services",
+                 2, parent.getServices().size());
+    assertInState(STATE.STOPPED, sibling);
+  }
+
+  @Test(timeout = 1000)
+  public void testAddStartedSiblingInStop() throws Throwable {
+    CompositeService parent = new CompositeService("parent");
+    BreakableService sibling = new BreakableService();
+    sibling.init(new Configuration());
+    sibling.start();
+    parent.addService(new AddSiblingService(parent,
+                                            sibling,
+                                            STATE.STOPPED));
+    parent.init(new Configuration());
+    parent.start();
+    parent.stop();
+    assertEquals("Incorrect number of services",
+                 2, parent.getServices().size());
+  }
+
   public static class CompositeServiceAddingAChild extends CompositeService{
   public static class CompositeServiceAddingAChild extends CompositeService{
     Service child;
     Service child;
 
 
@@ -379,7 +506,18 @@ public class TestCompositeService {
       super.serviceInit(conf);
       super.serviceInit(conf);
     }
     }
   }
   }
-  
+
+  public static class ServiceTestRuntimeException extends RuntimeException {
+    public ServiceTestRuntimeException(String message) {
+      super(message);
+    }
+  }
+
+  /**
+   * This is a composite service that keeps a count of the number of lifecycle
+   * events called, and can be set to throw a {@link ServiceTestRuntimeException }
+   * during service start or stop
+   */
   public static class CompositeServiceImpl extends CompositeService {
   public static class CompositeServiceImpl extends CompositeService {
 
 
     public static boolean isPolicyToStopOnlyStartedServices() {
     public static boolean isPolicyToStopOnlyStartedServices() {
@@ -408,7 +546,7 @@ public class TestCompositeService {
     @Override
     @Override
     protected void serviceStart() throws Exception {
     protected void serviceStart() throws Exception {
       if (throwExceptionOnStart) {
       if (throwExceptionOnStart) {
-        throw new YarnRuntimeException("Fake service start exception");
+        throw new ServiceTestRuntimeException("Fake service start exception");
       }
       }
       counter++;
       counter++;
       callSequenceNumber = counter;
       callSequenceNumber = counter;
@@ -420,7 +558,7 @@ public class TestCompositeService {
       counter++;
       counter++;
       callSequenceNumber = counter;
       callSequenceNumber = counter;
       if (throwExceptionOnStop) {
       if (throwExceptionOnStop) {
-        throw new YarnRuntimeException("Fake service stop exception");
+        throw new ServiceTestRuntimeException("Fake service stop exception");
       }
       }
       super.serviceStop();
       super.serviceStop();
     }
     }
@@ -457,6 +595,9 @@ public class TestCompositeService {
 
 
   }
   }
 
 
+  /**
+   * Composite service that makes the addService method public to all
+   */
   public static class ServiceManager extends CompositeService {
   public static class ServiceManager extends CompositeService {
 
 
     public void addTestService(CompositeService service) {
     public void addTestService(CompositeService service) {
@@ -468,4 +609,55 @@ public class TestCompositeService {
     }
     }
   }
   }
 
 
+  public static class AddSiblingService extends CompositeService {
+    private final CompositeService parent;
+    private final Service serviceToAdd;
+    private STATE triggerState;
+
+    public AddSiblingService(CompositeService parent,
+                             Service serviceToAdd,
+                             STATE triggerState) {
+      super("ParentStateManipulatorService");
+      this.parent = parent;
+      this.serviceToAdd = serviceToAdd;
+      this.triggerState = triggerState;
+    }
+
+    /**
+     * Add the serviceToAdd to the parent if this service
+     * is in the state requested
+     */
+    private void maybeAddSibling() {
+      if (getServiceState() == triggerState) {
+        parent.addService(serviceToAdd);
+      }
+    }
+
+    @Override
+    protected void serviceInit(Configuration conf) throws Exception {
+      maybeAddSibling();
+      super.serviceInit(conf);
+    }
+
+    @Override
+    protected void serviceStart() throws Exception {
+      maybeAddSibling();
+      super.serviceStart();
+    }
+
+    @Override
+    protected void serviceStop() throws Exception {
+      maybeAddSibling();
+      super.serviceStop();
+    }
+
+    /**
+     * Expose addService method
+     * @param parent parent service
+     * @param child child to add
+     */
+    public static void addChildToService(CompositeService parent, Service child) {
+      parent.addService(child);
+    }
+  }
 }
 }

+ 30 - 22
hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/IdUserGroup.java

@@ -50,14 +50,6 @@ public class IdUserGroup {
   private BiMap<Integer, String> gidNameMap = HashBiMap.create();
   private BiMap<Integer, String> gidNameMap = HashBiMap.create();
 
 
   private long lastUpdateTime = 0; // Last time maps were updated
   private long lastUpdateTime = 0; // Last time maps were updated
-
-  static public class DuplicateNameOrIdException extends IOException {
-    private static final long serialVersionUID = 1L;
-
-    public DuplicateNameOrIdException(String msg) {
-      super(msg);
-    }
-  }
   
   
   public IdUserGroup() throws IOException {
   public IdUserGroup() throws IOException {
     updateMaps();
     updateMaps();
@@ -80,7 +72,8 @@ public class IdUserGroup {
     }
     }
   }
   }
 
 
-  private static final String DUPLICATE_NAME_ID_DEBUG_INFO = "NFS gateway can't start with duplicate name or id on the host system.\n"
+  private static final String DUPLICATE_NAME_ID_DEBUG_INFO =
+      "NFS gateway could have problem starting with duplicate name or id on the host system.\n"
       + "This is because HDFS (non-kerberos cluster) uses name as the only way to identify a user or group.\n"
       + "This is because HDFS (non-kerberos cluster) uses name as the only way to identify a user or group.\n"
       + "The host system with duplicated user/group name or id might work fine most of the time by itself.\n"
       + "The host system with duplicated user/group name or id might work fine most of the time by itself.\n"
       + "However when NFS gateway talks to HDFS, HDFS accepts only user and group name.\n"
       + "However when NFS gateway talks to HDFS, HDFS accepts only user and group name.\n"
@@ -88,6 +81,16 @@ public class IdUserGroup {
       + "<getent passwd | cut -d: -f1,3> and <getent group | cut -d: -f1,3> on Linux systms,\n"
       + "<getent passwd | cut -d: -f1,3> and <getent group | cut -d: -f1,3> on Linux systms,\n"
       + "<dscl . -list /Users UniqueID> and <dscl . -list /Groups PrimaryGroupID> on MacOS.";
       + "<dscl . -list /Users UniqueID> and <dscl . -list /Groups PrimaryGroupID> on MacOS.";
   
   
+  private static void reportDuplicateEntry(final String header,
+      final Integer key, final String value,
+      final Integer ekey, final String evalue) {    
+      LOG.warn("\n" + header + String.format(
+          "new entry (%d, %s), existing entry: (%d, %s).\n%s\n%s",
+          key, value, ekey, evalue,
+          "The new entry is to be ignored for the following reason.",
+          DUPLICATE_NAME_ID_DEBUG_INFO));
+  }
+      
   /**
   /**
    * Get the whole list of users and groups and save them in the maps.
    * Get the whole list of users and groups and save them in the maps.
    * @throws IOException 
    * @throws IOException 
@@ -108,22 +111,27 @@ public class IdUserGroup {
         }
         }
         LOG.debug("add to " + mapName + "map:" + nameId[0] + " id:" + nameId[1]);
         LOG.debug("add to " + mapName + "map:" + nameId[0] + " id:" + nameId[1]);
         // HDFS can't differentiate duplicate names with simple authentication
         // HDFS can't differentiate duplicate names with simple authentication
-        Integer key = Integer.valueOf(nameId[1]);
-        String value = nameId[0];
+        final Integer key = Integer.valueOf(nameId[1]);
+        final String value = nameId[0];        
         if (map.containsKey(key)) {
         if (map.containsKey(key)) {
-          LOG.error(String.format(
-              "Got duplicate id:(%d, %s), existing entry: (%d, %s).\n%s", key,
-              value, key, map.get(key), DUPLICATE_NAME_ID_DEBUG_INFO));
-          throw new DuplicateNameOrIdException("Got duplicate id.");
+          final String prevValue = map.get(key);
+          if (value.equals(prevValue)) {
+            // silently ignore equivalent entries
+            continue;
+          }
+          reportDuplicateEntry(
+              "Got multiple names associated with the same id: ",
+              key, value, key, prevValue);           
+          continue;
         }
         }
-        if (map.containsValue(nameId[0])) {
-          LOG.error(String.format(
-              "Got duplicate name:(%d, %s), existing entry: (%d, %s) \n%s",
-              key, value, map.inverse().get(value), value,
-              DUPLICATE_NAME_ID_DEBUG_INFO));
-          throw new DuplicateNameOrIdException("Got duplicate name");
+        if (map.containsValue(value)) {
+          final Integer prevKey = map.inverse().get(value);
+          reportDuplicateEntry(
+              "Got multiple ids associated with the same name: ",
+              key, value, prevKey, value);
+          continue;
         }
         }
-        map.put(Integer.valueOf(nameId[1]), nameId[0]);
+        map.put(key, value);
       }
       }
       LOG.info("Updated " + mapName + " map size:" + map.size());
       LOG.info("Updated " + mapName + " map size:" + map.size());
       
       

+ 26 - 15
hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/nfs/nfs3/TestIdUserGroup.java

@@ -17,11 +17,10 @@
  */
  */
 package org.apache.hadoop.nfs.nfs3;
 package org.apache.hadoop.nfs.nfs3;
 
 
-import static org.junit.Assert.fail;
-
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 import java.io.IOException;
 import java.io.IOException;
 
 
-import org.apache.hadoop.nfs.nfs3.IdUserGroup.DuplicateNameOrIdException;
 import org.junit.Test;
 import org.junit.Test;
 
 
 import com.google.common.collect.BiMap;
 import com.google.common.collect.BiMap;
@@ -33,24 +32,36 @@ public class TestIdUserGroup {
   public void testDuplicates() throws IOException {
   public void testDuplicates() throws IOException {
     String GET_ALL_USERS_CMD = "echo \"root:x:0:0:root:/root:/bin/bash\n"
     String GET_ALL_USERS_CMD = "echo \"root:x:0:0:root:/root:/bin/bash\n"
         + "hdfs:x:11501:10787:Grid Distributed File System:/home/hdfs:/bin/bash\n"
         + "hdfs:x:11501:10787:Grid Distributed File System:/home/hdfs:/bin/bash\n"
-        + "hdfs:x:11502:10788:Grid Distributed File System:/home/hdfs:/bin/bash\""
+        + "hdfs:x:11502:10788:Grid Distributed File System:/home/hdfs:/bin/bash\n"
+        + "hdfs1:x:11501:10787:Grid Distributed File System:/home/hdfs:/bin/bash\n"
+        + "hdfs2:x:11502:10787:Grid Distributed File System:/home/hdfs:/bin/bash\n"
+        + "bin:x:2:2:bin:/bin:/bin/sh\n"
+        + "bin:x:1:1:bin:/bin:/sbin/nologin\n"
+        + "daemon:x:1:1:daemon:/usr/sbin:/bin/sh\n"
+        + "daemon:x:2:2:daemon:/sbin:/sbin/nologin\""
         + " | cut -d: -f1,3";
         + " | cut -d: -f1,3";
     String GET_ALL_GROUPS_CMD = "echo \"hdfs:*:11501:hrt_hdfs\n"
     String GET_ALL_GROUPS_CMD = "echo \"hdfs:*:11501:hrt_hdfs\n"
-        + "mapred:x:497\n" + "mapred2:x:497\"" + " | cut -d: -f1,3";
+        + "mapred:x:497\n"
+        + "mapred2:x:497\n"
+        + "mapred:x:498\n" 
+        + "mapred3:x:498\"" 
+        + " | cut -d: -f1,3";
     // Maps for id to name map
     // Maps for id to name map
     BiMap<Integer, String> uMap = HashBiMap.create();
     BiMap<Integer, String> uMap = HashBiMap.create();
     BiMap<Integer, String> gMap = HashBiMap.create();
     BiMap<Integer, String> gMap = HashBiMap.create();
 
 
-    try {
-      IdUserGroup.updateMapInternal(uMap, "user", GET_ALL_USERS_CMD, ":");
-      fail("didn't detect the duplicate name");
-    } catch (DuplicateNameOrIdException e) {
-    }
+    IdUserGroup.updateMapInternal(uMap, "user", GET_ALL_USERS_CMD, ":");
+    assertTrue(uMap.size() == 5);
+    assertEquals(uMap.get(0), "root");
+    assertEquals(uMap.get(11501), "hdfs");
+    assertEquals(uMap.get(11502), "hdfs2");
+    assertEquals(uMap.get(2), "bin");
+    assertEquals(uMap.get(1), "daemon");
     
     
-    try {
-      IdUserGroup.updateMapInternal(gMap, "group", GET_ALL_GROUPS_CMD, ":");
-      fail("didn't detect the duplicate id");
-    } catch (DuplicateNameOrIdException e) {
-    }
+    IdUserGroup.updateMapInternal(gMap, "group", GET_ALL_GROUPS_CMD, ":");
+    assertTrue(gMap.size() == 3);
+    assertEquals(gMap.get(11501), "hdfs");
+    assertEquals(gMap.get(497), "mapred");
+    assertEquals(gMap.get(498), "mapred3");    
   }
   }
 }
 }

+ 24 - 8
hadoop-common-project/hadoop-nfs/src/test/java/org/apache/hadoop/oncrpc/TestFrameDecoder.java

@@ -23,6 +23,7 @@ import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertTrue;
 
 
 import java.nio.ByteBuffer;
 import java.nio.ByteBuffer;
+import java.util.Random;
 
 
 import org.apache.hadoop.oncrpc.RpcUtil.RpcFrameDecoder;
 import org.apache.hadoop.oncrpc.RpcUtil.RpcFrameDecoder;
 import org.apache.hadoop.oncrpc.security.CredentialsNone;
 import org.apache.hadoop.oncrpc.security.CredentialsNone;
@@ -31,17 +32,17 @@ import org.jboss.netty.buffer.ByteBufferBackedChannelBuffer;
 import org.jboss.netty.buffer.ChannelBuffer;
 import org.jboss.netty.buffer.ChannelBuffer;
 import org.jboss.netty.buffer.ChannelBuffers;
 import org.jboss.netty.buffer.ChannelBuffers;
 import org.jboss.netty.channel.Channel;
 import org.jboss.netty.channel.Channel;
+import org.jboss.netty.channel.ChannelException;
 import org.jboss.netty.channel.ChannelHandlerContext;
 import org.jboss.netty.channel.ChannelHandlerContext;
 import org.junit.Test;
 import org.junit.Test;
 import org.mockito.Mockito;
 import org.mockito.Mockito;
 
 
 public class TestFrameDecoder {
 public class TestFrameDecoder {
 
 
-  private static int port = 12345; // some random server port
   private static int resultSize;
   private static int resultSize;
 
 
-  static void testRequest(XDR request) {
-    SimpleTcpClient tcpClient = new SimpleTcpClient("localhost", port, request,
+  static void testRequest(XDR request, int serverPort) {
+    SimpleTcpClient tcpClient = new SimpleTcpClient("localhost", serverPort, request,
         true);
         true);
     tcpClient.run();
     tcpClient.run();
   }
   }
@@ -148,10 +149,25 @@ public class TestFrameDecoder {
   @Test
   @Test
   public void testFrames() {
   public void testFrames() {
 
 
-    RpcProgram program = new TestFrameDecoder.TestRpcProgram("TestRpcProgram",
-        "localhost", port, 100000, 1, 2);
-    SimpleTcpServer tcpServer = new SimpleTcpServer(port, program, 1);
-    tcpServer.run();
+    Random rand = new Random();
+    int serverPort = 30000 + rand.nextInt(10000);
+    int retries = 10;    // A few retries in case initial choice is in use.
+
+    while (true) {
+      try {
+        RpcProgram program = new TestFrameDecoder.TestRpcProgram("TestRpcProgram",
+            "localhost", serverPort, 100000, 1, 2);
+        SimpleTcpServer tcpServer = new SimpleTcpServer(serverPort, program, 1);
+        tcpServer.run();
+        break;          // Successfully bound a port, break out.
+      } catch (ChannelException ce) {
+        if (retries-- > 0) {
+          serverPort += rand.nextInt(20); // Port in use? Try another.
+        } else {
+          throw ce;     // Out of retries.
+        }
+      }
+    }
 
 
     XDR xdrOut = createGetportMount();
     XDR xdrOut = createGetportMount();
     int headerSize = xdrOut.size();
     int headerSize = xdrOut.size();
@@ -161,7 +177,7 @@ public class TestFrameDecoder {
     int requestSize = xdrOut.size() - headerSize;
     int requestSize = xdrOut.size() - headerSize;
 
 
     // Send the request to the server
     // Send the request to the server
-    testRequest(xdrOut);
+    testRequest(xdrOut, serverPort);
 
 
     // Verify the server got the request with right size
     // Verify the server got the request with right size
     assertEquals(requestSize, resultSize);
     assertEquals(requestSize, resultSize);

+ 26 - 2
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java

@@ -26,6 +26,7 @@ import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeUnit;
 
 
+import com.google.common.base.Preconditions;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
@@ -163,8 +164,9 @@ class DFSClientCache {
     return new CacheLoader<String, DFSClient>() {
     return new CacheLoader<String, DFSClient>() {
       @Override
       @Override
       public DFSClient load(String userName) throws Exception {
       public DFSClient load(String userName) throws Exception {
-        UserGroupInformation ugi = UserGroupInformation
-            .createRemoteUser(userName);
+        UserGroupInformation ugi = getUserGroupInformation(
+                userName,
+                UserGroupInformation.getCurrentUser());
 
 
         // Guava requires CacheLoader never returns null.
         // Guava requires CacheLoader never returns null.
         return ugi.doAs(new PrivilegedExceptionAction<DFSClient>() {
         return ugi.doAs(new PrivilegedExceptionAction<DFSClient>() {
@@ -177,6 +179,28 @@ class DFSClientCache {
     };
     };
   }
   }
 
 
+  /**
+   * This method uses the currentUser, and real user to create a proxy
+   * @param effectiveUser The user who is being proxied by the real user
+   * @param realUser The actual user who does the command
+   * @return Proxy UserGroupInformation
+   * @throws IOException If proxying fails
+   */
+  UserGroupInformation getUserGroupInformation(
+          String effectiveUser,
+          UserGroupInformation realUser)
+          throws IOException {
+    Preconditions.checkNotNull(effectiveUser);
+    Preconditions.checkNotNull(realUser);
+    UserGroupInformation ugi =
+            UserGroupInformation.createProxyUser(effectiveUser, realUser);
+    if (LOG.isDebugEnabled()){
+      LOG.debug(String.format("Created ugi:" +
+              " %s for username: %s", ugi, effectiveUser));
+    }
+    return ugi;
+  }
+
   private RemovalListener<String, DFSClient> clientRemovalListener() {
   private RemovalListener<String, DFSClient> clientRemovalListener() {
     return new RemovalListener<String, DFSClient>() {
     return new RemovalListener<String, DFSClient>() {
       @Override
       @Override

+ 7 - 5
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java

@@ -479,9 +479,9 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
     } 
     } 
 
 
     try {
     try {
-      // Use superUserClient to get file attr since we don't know whether the
-      // NFS client user has access permission to the file
-      attrs = writeManager.getFileAttr(superUserClient, handle, iug);
+      // HDFS-5804 removed supserUserClient access
+      attrs = writeManager.getFileAttr(dfsClient, handle, iug);
+
       if (attrs == null) {
       if (attrs == null) {
         LOG.error("Can't get path for fileId:" + handle.getFileId());
         LOG.error("Can't get path for fileId:" + handle.getFileId());
         return new ACCESS3Response(Nfs3Status.NFS3ERR_STALE);
         return new ACCESS3Response(Nfs3Status.NFS3ERR_STALE);
@@ -603,8 +603,10 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
       // Only do access check.
       // Only do access check.
       try {
       try {
         // Don't read from cache. Client may not have read permission.
         // Don't read from cache. Client may not have read permission.
-        attrs = Nfs3Utils.getFileAttr(superUserClient,
-            Nfs3Utils.getFileIdPath(handle), iug);
+        attrs = Nfs3Utils.getFileAttr(
+                  dfsClient,
+                  Nfs3Utils.getFileIdPath(handle),
+                  iug);
       } catch (IOException e) {
       } catch (IOException e) {
         if (LOG.isDebugEnabled()) {
         if (LOG.isDebugEnabled()) {
           LOG.debug("Get error accessing file, fileId:" + handle.getFileId());
           LOG.debug("Get error accessing file, fileId:" + handle.getFileId());

+ 13 - 1
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestReaddir.java

@@ -22,6 +22,7 @@ import static org.junit.Assert.assertTrue;
 
 
 import java.io.IOException;
 import java.io.IOException;
 import java.net.InetAddress;
 import java.net.InetAddress;
+import java.util.Arrays;
 import java.util.List;
 import java.util.List;
 
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
@@ -40,6 +41,9 @@ import org.apache.hadoop.nfs.nfs3.response.READDIRPLUS3Response;
 import org.apache.hadoop.nfs.nfs3.response.READDIRPLUS3Response.EntryPlus3;
 import org.apache.hadoop.nfs.nfs3.response.READDIRPLUS3Response.EntryPlus3;
 import org.apache.hadoop.oncrpc.XDR;
 import org.apache.hadoop.oncrpc.XDR;
 import org.apache.hadoop.oncrpc.security.SecurityHandler;
 import org.apache.hadoop.oncrpc.security.SecurityHandler;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.authorize.ProxyUsers;
+import org.apache.hadoop.util.StringUtils;
 import org.junit.AfterClass;
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.BeforeClass;
@@ -58,9 +62,17 @@ public class TestReaddir {
   static RpcProgramNfs3 nfsd;
   static RpcProgramNfs3 nfsd;
   static String testdir = "/tmp";
   static String testdir = "/tmp";
   static SecurityHandler securityHandler;
   static SecurityHandler securityHandler;
-  
+
   @BeforeClass
   @BeforeClass
   public static void setup() throws Exception {
   public static void setup() throws Exception {
+    String currentUser = System.getProperty("user.name");
+    config.set(
+            ProxyUsers.getProxySuperuserGroupConfKey(currentUser),
+            "*");
+    config.set(
+            ProxyUsers.getProxySuperuserIpConfKey(currentUser),
+            "*");
+    ProxyUsers.refreshSuperUserGroupsConfiguration(config);
     cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).build();
     cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).build();
     cluster.waitActive();
     cluster.waitActive();
     hdfs = cluster.getFileSystem();
     hdfs = cluster.getFileSystem();

+ 25 - 0
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestDFSClientCache.java

@@ -20,12 +20,15 @@ package org.apache.hadoop.hdfs.nfs.nfs3;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertThat;
+import static org.hamcrest.core.Is.is;
 
 
 import java.io.IOException;
 import java.io.IOException;
 
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSClient;
+import org.apache.hadoop.security.UserGroupInformation;
 import org.junit.Test;
 import org.junit.Test;
 
 
 public class TestDFSClientCache {
 public class TestDFSClientCache {
@@ -49,6 +52,28 @@ public class TestDFSClientCache {
     assertEquals(MAX_CACHE_SIZE - 1, cache.clientCache.size());
     assertEquals(MAX_CACHE_SIZE - 1, cache.clientCache.size());
   }
   }
 
 
+  @Test
+  public void testGetUserGroupInformation() throws IOException {
+    String userName = "user1";
+    String currentUser = "currentUser";
+
+    UserGroupInformation currentUserUgi = UserGroupInformation
+            .createUserForTesting(currentUser, new String[0]);
+    currentUserUgi.setAuthenticationMethod(
+            UserGroupInformation.AuthenticationMethod.KERBEROS);
+    Configuration conf = new Configuration();
+    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "hdfs://localhost");
+    DFSClientCache cache = new DFSClientCache(conf);
+    UserGroupInformation ugiResult
+            = cache.getUserGroupInformation(userName, currentUserUgi);
+
+    assertThat(ugiResult.getUserName(), is(userName));
+    assertThat(ugiResult.getRealUser(), is(currentUserUgi));
+    assertThat(
+            ugiResult.getAuthenticationMethod(),
+            is(UserGroupInformation.AuthenticationMethod.PROXY));
+  }
+
   private static boolean isDfsClientClose(DFSClient c) {
   private static boolean isDfsClientClose(DFSClient c) {
     try {
     try {
       c.exists("");
       c.exists("");

+ 9 - 0
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java

@@ -50,6 +50,7 @@ import org.apache.hadoop.nfs.nfs3.response.CREATE3Response;
 import org.apache.hadoop.nfs.nfs3.response.READ3Response;
 import org.apache.hadoop.nfs.nfs3.response.READ3Response;
 import org.apache.hadoop.oncrpc.XDR;
 import org.apache.hadoop.oncrpc.XDR;
 import org.apache.hadoop.oncrpc.security.SecurityHandler;
 import org.apache.hadoop.oncrpc.security.SecurityHandler;
+import org.apache.hadoop.security.authorize.ProxyUsers;
 import org.jboss.netty.channel.Channel;
 import org.jboss.netty.channel.Channel;
 import org.junit.Assert;
 import org.junit.Assert;
 import org.junit.Test;
 import org.junit.Test;
@@ -285,6 +286,14 @@ public class TestWrites {
     SecurityHandler securityHandler = Mockito.mock(SecurityHandler.class);
     SecurityHandler securityHandler = Mockito.mock(SecurityHandler.class);
     Mockito.when(securityHandler.getUser()).thenReturn(
     Mockito.when(securityHandler.getUser()).thenReturn(
         System.getProperty("user.name"));
         System.getProperty("user.name"));
+    String currentUser = System.getProperty("user.name");
+    config.set(
+            ProxyUsers.getProxySuperuserGroupConfKey(currentUser),
+            "*");
+    config.set(
+            ProxyUsers.getProxySuperuserIpConfKey(currentUser),
+            "*");
+    ProxyUsers.refreshSuperUserGroupsConfiguration(config);
 
 
     try {
     try {
       cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).build();
       cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).build();

+ 50 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -297,8 +297,25 @@ Release 2.4.0 - UNRELEASED
     HDFS-5781. Use an array to record the mapping between FSEditLogOpCode and 
     HDFS-5781. Use an array to record the mapping between FSEditLogOpCode and 
     the corresponding byte value. (jing9)
     the corresponding byte value. (jing9)
 
 
+    HDFS-5153. Datanode should send block reports for each storage in a
+    separate message. (Arpit Agarwal)
+
+    HDFS-5804. HDFS NFS Gateway fails to mount and proxy when using Kerberos.
+    (Abin Shahab via jing9)
+
+    HDFS-5859. DataNode#checkBlockToken should check block tokens even if
+    security is not enabled. (cmccabe)
+
+    HDFS-5746.  Add ShortCircuitSharedMemorySegment (cmccabe)
+
+    HDFS-4911.  Reduce PeerCache timeout to be commensurate with
+    dfs.datanode.socket.reuse.keepalive (cmccabe)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
+    HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery
+    (todd)
+
   BUG FIXES
   BUG FIXES
 
 
     HDFS-5492. Port HDFS-2069 (Incorrect default trash interval in the
     HDFS-5492. Port HDFS-2069 (Incorrect default trash interval in the
@@ -307,6 +324,30 @@ Release 2.4.0 - UNRELEASED
     HDFS-5843. DFSClient.getFileChecksum() throws IOException if checksum is 
     HDFS-5843. DFSClient.getFileChecksum() throws IOException if checksum is 
     disabled. (Laurent Goujon via jing9)
     disabled. (Laurent Goujon via jing9)
 
 
+    HDFS-5856. DataNode.checkDiskError might throw NPE.
+    (Josh Elser via suresh)
+
+    HDFS-5828. BlockPlacementPolicyWithNodeGroup can place multiple replicas on
+    the same node group when dfs.namenode.avoid.write.stale.datanode is true. 
+    (Buddy via junping_du)
+
+    HDFS-5767. NFS implementation assumes userName userId mapping to be unique,
+    which is not true sometimes (Yongjun Zhang via brandonli)
+
+    HDFS-5791. TestHttpsFileSystem should use a random port to avoid binding
+    error during testing (Haohui Mai via brandonli)
+
+    HDFS-5709. Improve NameNode upgrade with existing reserved paths and path
+    components. (Andrew Wang via atm)
+
+    HDFS-5881. Fix skip() of the short-circuit local reader(legacy). (kihwal)
+
+    HDFS-5895. HDFS cacheadmin -listPools has exit_code of 1 when the command
+    returns 0 result. (Tassapol Athiapinya via cnauroth)
+
+    HDFS-5807. TestBalancerWithNodeGroup.testBalancerWithNodeGroup fails
+    intermittently. (Chen He via kihwal)
+
 Release 2.3.0 - UNRELEASED
 Release 2.3.0 - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES
@@ -836,6 +877,15 @@ Release 2.3.0 - UNRELEASED
     HDFS-5842. Cannot create hftp filesystem when using a proxy user ugi and a doAs 
     HDFS-5842. Cannot create hftp filesystem when using a proxy user ugi and a doAs 
     on a secure cluster. (jing9)
     on a secure cluster. (jing9)
 
 
+    HDFS-5399. Revisit SafeModeException and corresponding retry policies.
+    (Jing Zhao via todd)
+
+    HDFS-5876. SecureDataNodeStarter does not pick up configuration in 
+    hdfs-site.xml. (Haohui Mai via jing9)
+
+    HDFS-5873. dfs.http.policy should have higher precedence over dfs.https.enable.
+    (Haohui Mai via jing9)
+
   BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
   BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
 
 
     HDFS-4985. Add storage type to the protocol and expose it in block report
     HDFS-4985. Add storage type to the protocol and expose it in block report

+ 3 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java

@@ -629,7 +629,7 @@ class BlockReaderLocalLegacy implements BlockReader {
         skipBuf = new byte[bytesPerChecksum];
         skipBuf = new byte[bytesPerChecksum];
       }
       }
       int ret = read(skipBuf, 0, (int)(n - remaining));
       int ret = read(skipBuf, 0, (int)(n - remaining));
-      return ret;
+      return (remaining + ret);
     }
     }
   
   
     // optimize for big gap: discard the current buffer, skip to
     // optimize for big gap: discard the current buffer, skip to
@@ -660,9 +660,9 @@ class BlockReaderLocalLegacy implements BlockReader {
     int ret = read(skipBuf, 0, myOffsetFromChunkBoundary);
     int ret = read(skipBuf, 0, myOffsetFromChunkBoundary);
 
 
     if (ret == -1) {  // EOS
     if (ret == -1) {  // EOS
-      return toskip;
+      return (toskip + remaining);
     } else {
     } else {
-      return (toskip + ret);
+      return (toskip + remaining + ret);
     }
     }
   }
   }
 
 

+ 6 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java

@@ -36,6 +36,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_RETRY_MAX_ATTEMPTS_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_READ_PREFETCH_SIZE_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_READ_PREFETCH_SIZE_KEY;
@@ -258,6 +260,7 @@ public class DFSClient implements java.io.Closeable {
   public static class Conf {
   public static class Conf {
     final int hdfsTimeout;    // timeout value for a DFS operation.
     final int hdfsTimeout;    // timeout value for a DFS operation.
     final int maxFailoverAttempts;
     final int maxFailoverAttempts;
+    final int maxRetryAttempts;
     final int failoverSleepBaseMillis;
     final int failoverSleepBaseMillis;
     final int failoverSleepMaxMillis;
     final int failoverSleepMaxMillis;
     final int maxBlockAcquireFailures;
     final int maxBlockAcquireFailures;
@@ -303,6 +306,9 @@ public class DFSClient implements java.io.Closeable {
       maxFailoverAttempts = conf.getInt(
       maxFailoverAttempts = conf.getInt(
           DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY,
           DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY,
           DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT);
           DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT);
+      maxRetryAttempts = conf.getInt(
+          DFS_CLIENT_RETRY_MAX_ATTEMPTS_KEY,
+          DFS_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT);
       failoverSleepBaseMillis = conf.getInt(
       failoverSleepBaseMillis = conf.getInt(
           DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY,
           DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY,
           DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT);
           DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT);

+ 8 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

@@ -82,9 +82,11 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final int     DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_DEFAULT = 0;
   public static final int     DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_DEFAULT = 0;
   public static final String  DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_KEY = "dfs.client.failover.connection.retries.on.timeouts";
   public static final String  DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_KEY = "dfs.client.failover.connection.retries.on.timeouts";
   public static final int     DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT = 0;
   public static final int     DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT = 0;
+  public static final String  DFS_CLIENT_RETRY_MAX_ATTEMPTS_KEY = "dfs.client.retry.max.attempts";
+  public static final int     DFS_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT = 10;
   
   
   public static final String  DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY = "dfs.client.socketcache.expiryMsec";
   public static final String  DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY = "dfs.client.socketcache.expiryMsec";
-  public static final long    DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT = 2 * 60 * 1000;
+  public static final long    DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT = 3000;
   public static final String  DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL = "dfs.client.write.exclude.nodes.cache.expiry.interval.millis";
   public static final String  DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL = "dfs.client.write.exclude.nodes.cache.expiry.interval.millis";
   public static final long    DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL_DEFAULT = 10 * 60 * 1000; // 10 minutes, in ms
   public static final long    DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL_DEFAULT = 10 * 60 * 1000; // 10 minutes, in ms
   public static final String  DFS_NAMENODE_BACKUP_ADDRESS_KEY = "dfs.namenode.backup.address";
   public static final String  DFS_NAMENODE_BACKUP_ADDRESS_KEY = "dfs.namenode.backup.address";
@@ -213,7 +215,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_DATANODE_SYNCONCLOSE_KEY = "dfs.datanode.synconclose";
   public static final String  DFS_DATANODE_SYNCONCLOSE_KEY = "dfs.datanode.synconclose";
   public static final boolean DFS_DATANODE_SYNCONCLOSE_DEFAULT = false;
   public static final boolean DFS_DATANODE_SYNCONCLOSE_DEFAULT = false;
   public static final String  DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY = "dfs.datanode.socket.reuse.keepalive";
   public static final String  DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY = "dfs.datanode.socket.reuse.keepalive";
-  public static final int     DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_DEFAULT = 1000;
+  public static final int     DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_DEFAULT = 4000;
 
 
   public static final String DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY = "dfs.namenode.datanode.registration.ip-hostname-check";
   public static final String DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY = "dfs.namenode.datanode.registration.ip-hostname-check";
   public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT = true;
   public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT = true;
@@ -399,6 +401,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final long    DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT = 60 * 60 * 1000;
   public static final long    DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT = 60 * 60 * 1000;
   public static final String  DFS_BLOCKREPORT_INITIAL_DELAY_KEY = "dfs.blockreport.initialDelay";
   public static final String  DFS_BLOCKREPORT_INITIAL_DELAY_KEY = "dfs.blockreport.initialDelay";
   public static final int     DFS_BLOCKREPORT_INITIAL_DELAY_DEFAULT = 0;
   public static final int     DFS_BLOCKREPORT_INITIAL_DELAY_DEFAULT = 0;
+  public static final String  DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY = "dfs.blockreport.split.threshold";
+  public static final long    DFS_BLOCKREPORT_SPLIT_THRESHOLD_DEFAULT = 1000 * 1000;
   public static final String  DFS_CACHEREPORT_INTERVAL_MSEC_KEY = "dfs.cachereport.intervalMsec";
   public static final String  DFS_CACHEREPORT_INTERVAL_MSEC_KEY = "dfs.cachereport.intervalMsec";
   public static final long    DFS_CACHEREPORT_INTERVAL_MSEC_DEFAULT = 10 * 1000;
   public static final long    DFS_CACHEREPORT_INTERVAL_MSEC_DEFAULT = 10 * 1000;
   public static final String  DFS_BLOCK_INVALIDATE_LIMIT_KEY = "dfs.block.invalidate.limit";
   public static final String  DFS_BLOCK_INVALIDATE_LIMIT_KEY = "dfs.block.invalidate.limit";
@@ -572,6 +576,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_HTTP_CLIENT_RETRY_POLICY_SPEC_DEFAULT = "10000,6,60000,10"; //t1,n1,t2,n2,...
   public static final String  DFS_HTTP_CLIENT_RETRY_POLICY_SPEC_DEFAULT = "10000,6,60000,10"; //t1,n1,t2,n2,...
   public static final String  DFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY = "dfs.http.client.failover.max.attempts";
   public static final String  DFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY = "dfs.http.client.failover.max.attempts";
   public static final int     DFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT = 15;
   public static final int     DFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT = 15;
+  public static final String  DFS_HTTP_CLIENT_RETRY_MAX_ATTEMPTS_KEY = "dfs.http.client.retry.max.attempts";
+  public static final int     DFS_HTTP_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT = 10;
   public static final String  DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY = "dfs.http.client.failover.sleep.base.millis";
   public static final String  DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY = "dfs.http.client.failover.sleep.base.millis";
   public static final int     DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT = 500;
   public static final int     DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT = 500;
   public static final String  DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY = "dfs.http.client.failover.sleep.max.millis";
   public static final String  DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY = "dfs.http.client.failover.sleep.max.millis";

+ 8 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java

@@ -1345,6 +1345,14 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
           pos += blockReader.skip(diff);
           pos += blockReader.skip(diff);
           if (pos == targetPos) {
           if (pos == targetPos) {
             done = true;
             done = true;
+          } else {
+            // The range was already checked. If the block reader returns
+            // something unexpected instead of throwing an exception, it is
+            // most likely a bug. 
+            String errMsg = "BlockReader failed to seek to " + 
+                targetPos + ". Instead, it seeked to " + pos + ".";
+            DFSClient.LOG.warn(errMsg);
+            throw new IOException(errMsg);
           }
           }
         } catch (IOException e) {//make following read to retry
         } catch (IOException e) {//make following read to retry
           if(DFSClient.LOG.isDebugEnabled()) {
           if(DFSClient.LOG.isDebugEnabled()) {

+ 80 - 18
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java

@@ -261,6 +261,47 @@ public class DFSUtil {
     return true;
     return true;
   }
   }
 
 
+  /**
+   * Checks if a string is a valid path component. For instance, components
+   * cannot contain a ":" or "/", and cannot be equal to a reserved component
+   * like ".snapshot".
+   * <p>
+   * The primary use of this method is for validating paths when loading the
+   * FSImage. During normal NN operation, paths are sometimes allowed to
+   * contain reserved components.
+   * 
+   * @return If component is valid
+   */
+  public static boolean isValidNameForComponent(String component) {
+    if (component.equals(".") ||
+        component.equals("..") ||
+        component.indexOf(":") >= 0 ||
+        component.indexOf("/") >= 0) {
+      return false;
+    }
+    return !isReservedPathComponent(component);
+  }
+
+
+  /**
+   * Returns if the component is reserved.
+   * 
+   * <p>
+   * Note that some components are only reserved under certain directories, e.g.
+   * "/.reserved" is reserved, while "/hadoop/.reserved" is not.
+   * 
+   * @param component
+   * @return if the component is reserved
+   */
+  public static boolean isReservedPathComponent(String component) {
+    for (String reserved : HdfsConstants.RESERVED_PATH_COMPONENTS) {
+      if (component.equals(reserved)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   /**
   /**
    * Converts a byte array to a string using UTF8 encoding.
    * Converts a byte array to a string using UTF8 encoding.
    */
    */
@@ -312,7 +353,25 @@ public class DFSUtil {
     }
     }
     return result.toString();
     return result.toString();
   }
   }
-  
+
+  /**
+   * Converts a list of path components into a path using Path.SEPARATOR.
+   * 
+   * @param components Path components
+   * @return Combined path as a UTF-8 string
+   */
+  public static String strings2PathString(String[] components) {
+    if (components.length == 0) {
+      return "";
+    }
+    if (components.length == 1) {
+      if (components[0] == null || components[0].isEmpty()) {
+        return Path.SEPARATOR;
+      }
+    }
+    return Joiner.on(Path.SEPARATOR).join(components);
+  }
+
   /**
   /**
    * Given a list of path components returns a byte array
    * Given a list of path components returns a byte array
    */
    */
@@ -1508,31 +1567,34 @@ public class DFSUtil {
    * configuration settings.
    * configuration settings.
    */
    */
   public static HttpConfig.Policy getHttpPolicy(Configuration conf) {
   public static HttpConfig.Policy getHttpPolicy(Configuration conf) {
-    String httpPolicy = conf.get(DFSConfigKeys.DFS_HTTP_POLICY_KEY,
-        DFSConfigKeys.DFS_HTTP_POLICY_DEFAULT);
-
-    HttpConfig.Policy policy = HttpConfig.Policy.fromString(httpPolicy);
-
-    if (policy == HttpConfig.Policy.HTTP_ONLY) {
-      boolean httpsEnabled = conf.getBoolean(
-          DFSConfigKeys.DFS_HTTPS_ENABLE_KEY,
+    String policyStr = conf.get(DFSConfigKeys.DFS_HTTP_POLICY_KEY);
+    if (policyStr == null) {
+      boolean https = conf.getBoolean(DFSConfigKeys.DFS_HTTPS_ENABLE_KEY,
           DFSConfigKeys.DFS_HTTPS_ENABLE_DEFAULT);
           DFSConfigKeys.DFS_HTTPS_ENABLE_DEFAULT);
 
 
-      boolean hadoopSslEnabled = conf.getBoolean(
+      boolean hadoopSsl = conf.getBoolean(
           CommonConfigurationKeys.HADOOP_SSL_ENABLED_KEY,
           CommonConfigurationKeys.HADOOP_SSL_ENABLED_KEY,
           CommonConfigurationKeys.HADOOP_SSL_ENABLED_DEFAULT);
           CommonConfigurationKeys.HADOOP_SSL_ENABLED_DEFAULT);
 
 
-      if (hadoopSslEnabled) {
+      if (hadoopSsl) {
         LOG.warn(CommonConfigurationKeys.HADOOP_SSL_ENABLED_KEY
         LOG.warn(CommonConfigurationKeys.HADOOP_SSL_ENABLED_KEY
-            + " is deprecated. Please use "
-            + DFSConfigKeys.DFS_HTTPS_ENABLE_KEY + ".");
-        policy = HttpConfig.Policy.HTTPS_ONLY;
-      } else if (httpsEnabled) {
+            + " is deprecated. Please use " + DFSConfigKeys.DFS_HTTP_POLICY_KEY
+            + ".");
+      }
+      if (https) {
         LOG.warn(DFSConfigKeys.DFS_HTTPS_ENABLE_KEY
         LOG.warn(DFSConfigKeys.DFS_HTTPS_ENABLE_KEY
-            + " is deprecated. Please use "
-            + DFSConfigKeys.DFS_HTTPS_ENABLE_KEY + ".");
-        policy = HttpConfig.Policy.HTTP_AND_HTTPS;
+            + " is deprecated. Please use " + DFSConfigKeys.DFS_HTTP_POLICY_KEY
+            + ".");
       }
       }
+
+      return (hadoopSsl || https) ? HttpConfig.Policy.HTTP_AND_HTTPS
+          : HttpConfig.Policy.HTTP_ONLY;
+    }
+
+    HttpConfig.Policy policy = HttpConfig.Policy.fromString(policyStr);
+    if (policy == null) {
+      throw new HadoopIllegalArgumentException("Unregonized value '"
+          + policyStr + "' for " + DFSConfigKeys.DFS_HTTP_POLICY_KEY);
     }
     }
 
 
     conf.set(DFSConfigKeys.DFS_HTTP_POLICY_KEY, policy.name());
     conf.set(DFSConfigKeys.DFS_HTTP_POLICY_KEY, policy.name());

+ 11 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java

@@ -24,6 +24,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_RETRY_MAX_ATTEMPTS_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT;
 
 
 import java.io.IOException;
 import java.io.IOException;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.Constructor;
@@ -144,9 +146,10 @@ public class NameNodeProxies {
           .createFailoverProxyProvider(conf, failoverProxyProviderClass, xface,
           .createFailoverProxyProvider(conf, failoverProxyProviderClass, xface,
               nameNodeUri);
               nameNodeUri);
       Conf config = new Conf(conf);
       Conf config = new Conf(conf);
-      T proxy = (T) RetryProxy.create(xface, failoverProxyProvider, RetryPolicies
-          .failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL,
-              config.maxFailoverAttempts, config.failoverSleepBaseMillis,
+      T proxy = (T) RetryProxy.create(xface, failoverProxyProvider,
+          RetryPolicies.failoverOnNetworkException(
+              RetryPolicies.TRY_ONCE_THEN_FAIL, config.maxFailoverAttempts,
+              config.maxRetryAttempts, config.failoverSleepBaseMillis,
               config.failoverSleepMaxMillis));
               config.failoverSleepMaxMillis));
       
       
       Text dtService = HAUtil.buildTokenServiceForLogicalUri(nameNodeUri);
       Text dtService = HAUtil.buildTokenServiceForLogicalUri(nameNodeUri);
@@ -192,11 +195,14 @@ public class NameNodeProxies {
       int maxFailoverAttempts = config.getInt(
       int maxFailoverAttempts = config.getInt(
           DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY,
           DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY,
           DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT);
           DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT);
+      int maxRetryAttempts = config.getInt(
+          DFS_CLIENT_RETRY_MAX_ATTEMPTS_KEY,
+          DFS_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT);
       InvocationHandler dummyHandler = new LossyRetryInvocationHandler<T>(
       InvocationHandler dummyHandler = new LossyRetryInvocationHandler<T>(
               numResponseToDrop, failoverProxyProvider,
               numResponseToDrop, failoverProxyProvider,
               RetryPolicies.failoverOnNetworkException(
               RetryPolicies.failoverOnNetworkException(
-                  RetryPolicies.TRY_ONCE_THEN_FAIL, 
-                  Math.max(numResponseToDrop + 1, maxFailoverAttempts), delay, 
+                  RetryPolicies.TRY_ONCE_THEN_FAIL, maxFailoverAttempts, 
+                  Math.max(numResponseToDrop + 1, maxRetryAttempts), delay, 
                   maxCap));
                   maxCap));
       
       
       T proxy = (T) Proxy.newProxyInstance(
       T proxy = (T) Proxy.newProxyInstance(

+ 18 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java

@@ -18,6 +18,7 @@
 
 
 package org.apache.hadoop.hdfs;
 package org.apache.hadoop.hdfs;
 
 
+import java.io.IOException;
 import java.util.Iterator;
 import java.util.Iterator;
 import java.util.List;
 import java.util.List;
 import java.util.Map.Entry;
 import java.util.Map.Entry;
@@ -25,6 +26,7 @@ import java.util.Map.Entry;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.LinkedListMultimap;
 import com.google.common.collect.LinkedListMultimap;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
@@ -118,6 +120,11 @@ class PeerCache {
     return instance;
     return instance;
   }
   }
 
 
+  @VisibleForTesting
+  public static synchronized void setInstance(int c, long e) {
+    instance = new PeerCache(c, e);
+  }
+
   private boolean isDaemonStarted() {
   private boolean isDaemonStarted() {
     return (daemon == null)? false: true;
     return (daemon == null)? false: true;
   }
   }
@@ -171,8 +178,17 @@ class PeerCache {
     while (iter.hasNext()) {
     while (iter.hasNext()) {
       Value candidate = iter.next();
       Value candidate = iter.next();
       iter.remove();
       iter.remove();
-      if (!candidate.getPeer().isClosed()) {
-        return candidate.getPeer();
+      long ageMs = Time.monotonicNow() - candidate.getTime();
+      Peer peer = candidate.getPeer();
+      if (ageMs >= expiryPeriod) {
+        try {
+          peer.close();
+        } catch (IOException e) {
+          LOG.warn("got IOException closing stale peer " + peer +
+                ", which is " + ageMs + " ms old");
+        }
+      } else if (!peer.isClosed()) {
+        return peer;
       }
       }
     }
     }
     return null;
     return null;

+ 302 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitSharedMemorySegment.java

@@ -0,0 +1,302 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.client;
+
+import java.io.Closeable;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.lang.reflect.Field;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.nativeio.NativeIO;
+import org.apache.hadoop.io.nativeio.NativeIO.POSIX;
+import org.apache.hadoop.util.CloseableReferenceCount;
+import org.apache.hadoop.util.Shell;
+
+import com.google.common.base.Preconditions;
+import com.google.common.primitives.Ints;
+
+import sun.misc.Unsafe;
+
+public class ShortCircuitSharedMemorySegment implements Closeable {
+  private static final Log LOG =
+    LogFactory.getLog(ShortCircuitSharedMemorySegment.class);
+
+  private static final int BYTES_PER_SLOT = 64;
+
+  private static final Unsafe unsafe;
+
+  static {
+    Unsafe theUnsafe = null;
+    try {
+      Field f = Unsafe.class.getDeclaredField("theUnsafe");
+      f.setAccessible(true);
+      theUnsafe = (Unsafe)f.get(null);
+    } catch (Throwable e) {
+      LOG.error("failed to load misc.Unsafe", e);
+    }
+    unsafe = theUnsafe;
+  }
+
+  /**
+   * A slot containing information about a replica.
+   *
+   * The format is:
+   * word 0
+   *   bit 0:32   Slot flags (see below).
+   *   bit 33:63  Anchor count.
+   * word 1:7
+   *   Reserved for future use, such as statistics.
+   *   Padding is also useful for avoiding false sharing.
+   *
+   * Little-endian versus big-endian is not relevant here since both the client
+   * and the server reside on the same computer and use the same orientation.
+   */
+  public class Slot implements Closeable {
+    /**
+     * Flag indicating that the slot is in use.
+     */
+    private static final long SLOT_IN_USE_FLAG =    1L<<63;
+
+    /**
+     * Flag indicating that the slot can be anchored.
+     */
+    private static final long ANCHORABLE_FLAG =     1L<<62;
+
+    private long slotAddress;
+
+    Slot(long slotAddress) {
+      this.slotAddress = slotAddress;
+    }
+
+    /**
+     * Make a given slot anchorable.
+     */
+    public void makeAnchorable() {
+      Preconditions.checkState(slotAddress != 0,
+          "Called makeAnchorable on a slot that was closed.");
+      long prev;
+      do {
+        prev = unsafe.getLongVolatile(null, this.slotAddress);
+        if ((prev & ANCHORABLE_FLAG) != 0) {
+          return;
+        }
+      } while (!unsafe.compareAndSwapLong(null, this.slotAddress,
+                  prev, prev | ANCHORABLE_FLAG));
+    }
+
+    /**
+     * Make a given slot unanchorable.
+     */
+    public void makeUnanchorable() {
+      Preconditions.checkState(slotAddress != 0,
+          "Called makeUnanchorable on a slot that was closed.");
+      long prev;
+      do {
+        prev = unsafe.getLongVolatile(null, this.slotAddress);
+        if ((prev & ANCHORABLE_FLAG) == 0) {
+          return;
+        }
+      } while (!unsafe.compareAndSwapLong(null, this.slotAddress,
+                  prev, prev & (~ANCHORABLE_FLAG)));
+    }
+
+    /**
+     * Try to add an anchor for a given slot.
+     *
+     * When a slot is anchored, we know that the block it refers to is resident
+     * in memory.
+     *
+     * @return          True if the slot is anchored.
+     */
+    public boolean addAnchor() {
+      long prev;
+      do {
+        prev = unsafe.getLongVolatile(null, this.slotAddress);
+        if ((prev & 0x7fffffff) == 0x7fffffff) {
+          // Too many other threads have anchored the slot (2 billion?)
+          return false;
+        }
+        if ((prev & ANCHORABLE_FLAG) == 0) {
+          // Slot can't be anchored right now.
+          return false;
+        }
+      } while (!unsafe.compareAndSwapLong(null, this.slotAddress,
+                  prev, prev + 1));
+      return true;
+    }
+
+    /**
+     * Remove an anchor for a given slot.
+     */
+    public void removeAnchor() {
+      long prev;
+      do {
+        prev = unsafe.getLongVolatile(null, this.slotAddress);
+        Preconditions.checkState((prev & 0x7fffffff) != 0,
+            "Tried to remove anchor for slot " + slotAddress +", which was " +
+            "not anchored.");
+      } while (!unsafe.compareAndSwapLong(null, this.slotAddress,
+                  prev, prev - 1));
+    }
+
+    /**
+     * @return      The index of this slot.
+     */
+    public int getIndex() {
+      Preconditions.checkState(slotAddress != 0);
+      return Ints.checkedCast(
+          (slotAddress - baseAddress) / BYTES_PER_SLOT);
+    }
+
+    @Override
+    public void close() throws IOException {
+      if (slotAddress == 0) return;
+      long prev;
+      do {
+        prev = unsafe.getLongVolatile(null, this.slotAddress);
+        Preconditions.checkState((prev & SLOT_IN_USE_FLAG) != 0,
+            "tried to close slot that wasn't open");
+      } while (!unsafe.compareAndSwapLong(null, this.slotAddress,
+                  prev, 0));
+      slotAddress = 0;
+      if (ShortCircuitSharedMemorySegment.this.refCount.unreference()) {
+        ShortCircuitSharedMemorySegment.this.free();
+      }
+    }
+  }
+
+  /**
+   * The stream that we're going to use to create this shared memory segment.
+   *
+   * Although this is a FileInputStream, we are going to assume that the
+   * underlying file descriptor is writable as well as readable.
+   * It would be more appropriate to use a RandomAccessFile here, but that class
+   * does not have any public accessor which returns a FileDescriptor, unlike
+   * FileInputStream.
+   */
+  private final FileInputStream stream;
+
+  /**
+   * Length of the shared memory segment.
+   */
+  private final int length;
+
+  /**
+   * The base address of the memory-mapped file.
+   */
+  private final long baseAddress;
+
+  /**
+   * Reference count and 'closed' status.
+   */
+  private final CloseableReferenceCount refCount = new CloseableReferenceCount();
+
+  public ShortCircuitSharedMemorySegment(FileInputStream stream)
+        throws IOException {
+    if (!NativeIO.isAvailable()) {
+      throw new UnsupportedOperationException("NativeIO is not available.");
+    }
+    if (Shell.WINDOWS) {
+      throw new UnsupportedOperationException(
+          "ShortCircuitSharedMemorySegment is not yet implemented " +
+          "for Windows.");
+    }
+    if (unsafe == null) {
+      throw new UnsupportedOperationException(
+          "can't use ShortCircuitSharedMemorySegment because we failed to " +
+          "load misc.Unsafe.");
+    }
+    this.refCount.reference();
+    this.stream = stream;
+    this.length = getEffectiveLength(stream);
+    this.baseAddress = POSIX.mmap(this.stream.getFD(), 
+      POSIX.MMAP_PROT_READ | POSIX.MMAP_PROT_WRITE, true, this.length);
+  }
+
+  /**
+   * Calculate the effective usable size of the shared memory segment.
+   * We round down to a multiple of the slot size and do some validation.
+   *
+   * @param stream The stream we're using.
+   * @return       The effective usable size of the shared memory segment.
+   */
+  private static int getEffectiveLength(FileInputStream stream)
+      throws IOException {
+    int intSize = Ints.checkedCast(stream.getChannel().size());
+    int slots = intSize / BYTES_PER_SLOT;
+    Preconditions.checkState(slots > 0, "size of shared memory segment was " +
+        intSize + ", but that is not enough to hold even one slot.");
+    return slots * BYTES_PER_SLOT;
+  }
+
+  private boolean allocateSlot(long address) {
+    long prev;
+    do {
+      prev = unsafe.getLongVolatile(null, address);
+      if ((prev & Slot.SLOT_IN_USE_FLAG) != 0) {
+        return false;
+      }
+    } while (!unsafe.compareAndSwapLong(null, address,
+                prev, prev | Slot.SLOT_IN_USE_FLAG));
+    return true;
+  }
+
+  /**
+   * Allocate a new Slot in this shared memory segment.
+   *
+   * @return        A newly allocated Slot, or null if there were no available
+   *                slots.
+   */
+  public Slot allocateNextSlot() throws IOException {
+    ShortCircuitSharedMemorySegment.this.refCount.reference();
+    Slot slot = null;
+    try {
+      final int numSlots = length / BYTES_PER_SLOT;
+      for (int i = 0; i < numSlots; i++) {
+        long address = this.baseAddress + (i * BYTES_PER_SLOT);
+        if (allocateSlot(address)) {
+          slot = new Slot(address);
+          break;
+        }
+      }
+    } finally {
+      if (slot == null) {
+        if (refCount.unreference()) {
+          free();
+        }
+      }
+    }
+    return slot;
+  }
+
+  @Override
+  public void close() throws IOException {
+    refCount.setClosed();
+    if (refCount.unreference()) {
+      free();
+    }
+  }
+
+  void free() throws IOException {
+    IOUtils.cleanup(LOG, stream);
+    POSIX.munmap(baseAddress, length);
+  }
+}

+ 12 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java

@@ -22,6 +22,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.server.namenode.FSDirectory;
 
 
 /************************************
 /************************************
  * Some handy constants
  * Some handy constants
@@ -108,7 +109,17 @@ public class HdfsConstants {
    */
    */
   public static final int LAYOUT_VERSION = LayoutVersion
   public static final int LAYOUT_VERSION = LayoutVersion
       .getCurrentLayoutVersion();
       .getCurrentLayoutVersion();
-  
+
+  /**
+   * Path components that are reserved in HDFS.
+   * <p>
+   * .reserved is only reserved under root ("/").
+   */
+  public static final String[] RESERVED_PATH_COMPONENTS = new String[] {
+    HdfsConstants.DOT_SNAPSHOT_DIR,
+    FSDirectory.DOT_RESERVED_STRING
+  };
+
   /**
   /**
    * A special path component contained in the path for a snapshot file/dir
    * A special path component contained in the path for a snapshot file/dir
    */
    */

+ 21 - 8
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

@@ -1616,15 +1616,19 @@ public class BlockManager {
   /**
   /**
    * The given storage is reporting all its blocks.
    * The given storage is reporting all its blocks.
    * Update the (storage-->block list) and (block-->storage list) maps.
    * Update the (storage-->block list) and (block-->storage list) maps.
+   *
+   * @return true if all known storages of the given DN have finished reporting.
+   * @throws IOException
    */
    */
-  public void processReport(final DatanodeID nodeID,
+  public boolean processReport(final DatanodeID nodeID,
       final DatanodeStorage storage, final String poolId,
       final DatanodeStorage storage, final String poolId,
       final BlockListAsLongs newReport) throws IOException {
       final BlockListAsLongs newReport) throws IOException {
     namesystem.writeLock();
     namesystem.writeLock();
     final long startTime = Time.now(); //after acquiring write lock
     final long startTime = Time.now(); //after acquiring write lock
     final long endTime;
     final long endTime;
+    DatanodeDescriptor node;
     try {
     try {
-      final DatanodeDescriptor node = datanodeManager.getDatanode(nodeID);
+      node = datanodeManager.getDatanode(nodeID);
       if (node == null || !node.isAlive) {
       if (node == null || !node.isAlive) {
         throw new IOException(
         throw new IOException(
             "ProcessReport from dead or unregistered node: " + nodeID);
             "ProcessReport from dead or unregistered node: " + nodeID);
@@ -1632,13 +1636,21 @@ public class BlockManager {
 
 
       // To minimize startup time, we discard any second (or later) block reports
       // To minimize startup time, we discard any second (or later) block reports
       // that we receive while still in startup phase.
       // that we receive while still in startup phase.
-      final DatanodeStorageInfo storageInfo = node.updateStorage(storage);
+      DatanodeStorageInfo storageInfo = node.getStorageInfo(storage.getStorageID());
+
+      if (storageInfo == null) {
+        // We handle this for backwards compatibility.
+        storageInfo = node.updateStorage(storage);
+        LOG.warn("Unknown storageId " + storage.getStorageID() +
+                    ", updating storageMap. This indicates a buggy " +
+                    "DataNode that isn't heartbeating correctly.");
+      }
       if (namesystem.isInStartupSafeMode()
       if (namesystem.isInStartupSafeMode()
           && storageInfo.getBlockReportCount() > 0) {
           && storageInfo.getBlockReportCount() > 0) {
         blockLog.info("BLOCK* processReport: "
         blockLog.info("BLOCK* processReport: "
             + "discarded non-initial block report from " + nodeID
             + "discarded non-initial block report from " + nodeID
             + " because namenode still in startup phase");
             + " because namenode still in startup phase");
-        return;
+        return !node.hasStaleStorages();
       }
       }
 
 
       if (storageInfo.numBlocks() == 0) {
       if (storageInfo.numBlocks() == 0) {
@@ -1655,7 +1667,7 @@ public class BlockManager {
       storageInfo.receivedBlockReport();
       storageInfo.receivedBlockReport();
       if (staleBefore && !storageInfo.areBlockContentsStale()) {
       if (staleBefore && !storageInfo.areBlockContentsStale()) {
         LOG.info("BLOCK* processReport: Received first block report from "
         LOG.info("BLOCK* processReport: Received first block report from "
-            + node + " after starting up or becoming active. Its block "
+            + storage + " after starting up or becoming active. Its block "
             + "contents are no longer considered stale");
             + "contents are no longer considered stale");
         rescanPostponedMisreplicatedBlocks();
         rescanPostponedMisreplicatedBlocks();
       }
       }
@@ -1670,9 +1682,10 @@ public class BlockManager {
     if (metrics != null) {
     if (metrics != null) {
       metrics.addBlockReport((int) (endTime - startTime));
       metrics.addBlockReport((int) (endTime - startTime));
     }
     }
-    blockLog.info("BLOCK* processReport: from "
-        + nodeID + ", blocks: " + newReport.getNumberOfBlocks()
+    blockLog.info("BLOCK* processReport: from storage " + storage.getStorageID()
+        + " node " + nodeID + ", blocks: " + newReport.getNumberOfBlocks()
         + ", processing time: " + (endTime - startTime) + " msecs");
         + ", processing time: " + (endTime - startTime) + " msecs");
+    return !node.hasStaleStorages();
   }
   }
 
 
   /**
   /**
@@ -1827,7 +1840,7 @@ public class BlockManager {
       Collection<BlockToMarkCorrupt> toCorrupt, // add to corrupt replicas list
       Collection<BlockToMarkCorrupt> toCorrupt, // add to corrupt replicas list
       Collection<StatefulBlockInfo> toUC) { // add to under-construction list
       Collection<StatefulBlockInfo> toUC) { // add to under-construction list
 
 
-    final DatanodeStorageInfo storageInfo = dn.updateStorage(storage);
+    final DatanodeStorageInfo storageInfo = dn.getStorageInfo(storage.getStorageID());
 
 
     // place a delimiter in the list which separates blocks 
     // place a delimiter in the list which separates blocks 
     // that have been reported from those that have not
     // that have been reported from those that have not

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java

@@ -317,7 +317,7 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
         // We need to additionally exclude the nodes that were added to the 
         // We need to additionally exclude the nodes that were added to the 
         // result list in the successful calls to choose*() above.
         // result list in the successful calls to choose*() above.
         for (DatanodeStorageInfo resultStorage : results) {
         for (DatanodeStorageInfo resultStorage : results) {
-          oldExcludedNodes.add(resultStorage.getDatanodeDescriptor());
+          addToExcludedNodes(resultStorage.getDatanodeDescriptor(), oldExcludedNodes);
         }
         }
         // Set numOfReplicas, since it can get out of sync with the result list
         // Set numOfReplicas, since it can get out of sync with the result list
         // if the NotEnoughReplicasException was thrown in chooseRandom().
         // if the NotEnoughReplicasException was thrown in chooseRandom().

+ 11 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java

@@ -257,6 +257,17 @@ public class DatanodeDescriptor extends DatanodeInfo {
     }
     }
   }
   }
 
 
+  boolean hasStaleStorages() {
+    synchronized (storageMap) {
+      for (DatanodeStorageInfo storage : storageMap.values()) {
+        if (storage.areBlockContentsStale()) {
+          return true;
+        }
+      }
+      return false;
+    }
+  }
+
   /**
   /**
    * Remove block from the list of blocks belonging to the data-node. Remove
    * Remove block from the list of blocks belonging to the data-node. Remove
    * data-node from the block.
    * data-node from the block.

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java

@@ -59,7 +59,8 @@ public final class HdfsServerConstants {
     INITIALIZESHAREDEDITS("-initializeSharedEdits"),
     INITIALIZESHAREDEDITS("-initializeSharedEdits"),
     RECOVER  ("-recover"),
     RECOVER  ("-recover"),
     FORCE("-force"),
     FORCE("-force"),
-    NONINTERACTIVE("-nonInteractive");
+    NONINTERACTIVE("-nonInteractive"),
+    RENAMERESERVED("-renameReserved");
     
     
     private final String name;
     private final String name;
     
     

+ 94 - 71
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java

@@ -22,11 +22,9 @@ import static org.apache.hadoop.util.Time.now;
 import java.io.IOException;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.net.InetSocketAddress;
 import java.net.SocketTimeoutException;
 import java.net.SocketTimeoutException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
 
 
+import com.google.common.base.Joiner;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
@@ -437,75 +435,100 @@ class BPServiceActor implements Runnable {
 
 
   /**
   /**
    * Report the list blocks to the Namenode
    * Report the list blocks to the Namenode
+   * @return DatanodeCommands returned by the NN. May be null.
    * @throws IOException
    * @throws IOException
    */
    */
-  DatanodeCommand blockReport() throws IOException {
+  List<DatanodeCommand> blockReport() throws IOException {
     // send block report if timer has expired.
     // send block report if timer has expired.
-    DatanodeCommand cmd = null;
-    long startTime = now();
-    if (startTime - lastBlockReport > dnConf.blockReportInterval) {
-
-      // Flush any block information that precedes the block report. Otherwise
-      // we have a chance that we will miss the delHint information
-      // or we will report an RBW replica after the BlockReport already reports
-      // a FINALIZED one.
-      reportReceivedDeletedBlocks();
-
-      // Send one block report per known storage.
-
-      // Create block report
-      long brCreateStartTime = now();
-      long totalBlockCount = 0;
-
-      Map<DatanodeStorage, BlockListAsLongs> perVolumeBlockLists =
-          dn.getFSDataset().getBlockReports(bpos.getBlockPoolId());
-
-      // Send block report
-      long brSendStartTime = now();
-      StorageBlockReport[] reports =
-          new StorageBlockReport[perVolumeBlockLists.size()];
-
-      int i = 0;
-      for(Map.Entry<DatanodeStorage, BlockListAsLongs> kvPair : perVolumeBlockLists.entrySet()) {
-        DatanodeStorage dnStorage = kvPair.getKey();
-        BlockListAsLongs blockList = kvPair.getValue();
-        totalBlockCount += blockList.getNumberOfBlocks();
-
-        reports[i++] =
-            new StorageBlockReport(
-              dnStorage, blockList.getBlockListAsLongs());
-      }
+    final long startTime = now();
+    if (startTime - lastBlockReport <= dnConf.blockReportInterval) {
+      return null;
+    }
 
 
-      cmd = bpNamenode.blockReport(bpRegistration, bpos.getBlockPoolId(), reports);
-
-      // Log the block report processing stats from Datanode perspective
-      long brSendCost = now() - brSendStartTime;
-      long brCreateCost = brSendStartTime - brCreateStartTime;
-      dn.getMetrics().addBlockReport(brSendCost);
-      LOG.info("BlockReport of " + totalBlockCount
-          + " blocks took " + brCreateCost + " msec to generate and "
-          + brSendCost + " msecs for RPC and NN processing");
-
-      // If we have sent the first block report, then wait a random
-      // time before we start the periodic block reports.
-      if (resetBlockReportTime) {
-        lastBlockReport = startTime - DFSUtil.getRandom().nextInt((int)(dnConf.blockReportInterval));
-        resetBlockReportTime = false;
-      } else {
-        /* say the last block report was at 8:20:14. The current report
-         * should have started around 9:20:14 (default 1 hour interval).
-         * If current time is :
-         *   1) normal like 9:20:18, next report should be at 10:20:14
-         *   2) unexpected like 11:35:43, next report should be at 12:20:14
-         */
-        lastBlockReport += (now() - lastBlockReport) /
-        dnConf.blockReportInterval * dnConf.blockReportInterval;
+    ArrayList<DatanodeCommand> cmds = new ArrayList<DatanodeCommand>();
+
+    // Flush any block information that precedes the block report. Otherwise
+    // we have a chance that we will miss the delHint information
+    // or we will report an RBW replica after the BlockReport already reports
+    // a FINALIZED one.
+    reportReceivedDeletedBlocks();
+    lastDeletedReport = startTime;
+
+    long brCreateStartTime = now();
+    Map<DatanodeStorage, BlockListAsLongs> perVolumeBlockLists =
+        dn.getFSDataset().getBlockReports(bpos.getBlockPoolId());
+
+    // Convert the reports to the format expected by the NN.
+    int i = 0;
+    int totalBlockCount = 0;
+    StorageBlockReport reports[] =
+        new StorageBlockReport[perVolumeBlockLists.size()];
+
+    for(Map.Entry<DatanodeStorage, BlockListAsLongs> kvPair : perVolumeBlockLists.entrySet()) {
+      BlockListAsLongs blockList = kvPair.getValue();
+      reports[i++] = new StorageBlockReport(
+          kvPair.getKey(), blockList.getBlockListAsLongs());
+      totalBlockCount += blockList.getNumberOfBlocks();
+    }
+
+    // Send the reports to the NN.
+    int numReportsSent;
+    long brSendStartTime = now();
+    if (totalBlockCount < dnConf.blockReportSplitThreshold) {
+      // Below split threshold, send all reports in a single message.
+      numReportsSent = 1;
+      DatanodeCommand cmd =
+          bpNamenode.blockReport(bpRegistration, bpos.getBlockPoolId(), reports);
+      if (cmd != null) {
+        cmds.add(cmd);
+      }
+    } else {
+      // Send one block report per message.
+      numReportsSent = i;
+      for (StorageBlockReport report : reports) {
+        StorageBlockReport singleReport[] = { report };
+        DatanodeCommand cmd = bpNamenode.blockReport(
+            bpRegistration, bpos.getBlockPoolId(), singleReport);
+        if (cmd != null) {
+          cmds.add(cmd);
+        }
       }
       }
-      LOG.info("sent block report, processed command:" + cmd);
     }
     }
-    return cmd;
+
+    // Log the block report processing stats from Datanode perspective
+    long brSendCost = now() - brSendStartTime;
+    long brCreateCost = brSendStartTime - brCreateStartTime;
+    dn.getMetrics().addBlockReport(brSendCost);
+    LOG.info("Sent " + numReportsSent + " blockreports " + totalBlockCount +
+        " blocks total. Took " + brCreateCost +
+        " msec to generate and " + brSendCost +
+        " msecs for RPC and NN processing. " +
+        " Got back commands " +
+            (cmds.size() == 0 ? "none" : Joiner.on("; ").join(cmds)));
+
+    scheduleNextBlockReport(startTime);
+    return cmds.size() == 0 ? null : cmds;
+  }
+
+  private void scheduleNextBlockReport(long previousReportStartTime) {
+    // If we have sent the first set of block reports, then wait a random
+    // time before we start the periodic block reports.
+    if (resetBlockReportTime) {
+      lastBlockReport = previousReportStartTime -
+          DFSUtil.getRandom().nextInt((int)(dnConf.blockReportInterval));
+      resetBlockReportTime = false;
+    } else {
+      /* say the last block report was at 8:20:14. The current report
+       * should have started around 9:20:14 (default 1 hour interval).
+       * If current time is :
+       *   1) normal like 9:20:18, next report should be at 10:20:14
+       *   2) unexpected like 11:35:43, next report should be at 12:20:14
+       */
+      lastBlockReport += (now() - lastBlockReport) /
+          dnConf.blockReportInterval * dnConf.blockReportInterval;
+    }
   }
   }
-  
+
   DatanodeCommand cacheReport() throws IOException {
   DatanodeCommand cacheReport() throws IOException {
     // If caching is disabled, do not send a cache report
     // If caching is disabled, do not send a cache report
     if (dn.getFSDataset().getCacheCapacity() == 0) {
     if (dn.getFSDataset().getCacheCapacity() == 0) {
@@ -513,7 +536,7 @@ class BPServiceActor implements Runnable {
     }
     }
     // send cache report if timer has expired.
     // send cache report if timer has expired.
     DatanodeCommand cmd = null;
     DatanodeCommand cmd = null;
-    long startTime = Time.monotonicNow();
+    final long startTime = Time.monotonicNow();
     if (startTime - lastCacheReport > dnConf.cacheReportInterval) {
     if (startTime - lastCacheReport > dnConf.cacheReportInterval) {
       if (LOG.isDebugEnabled()) {
       if (LOG.isDebugEnabled()) {
         LOG.debug("Sending cacheReport from service actor: " + this);
         LOG.debug("Sending cacheReport from service actor: " + this);
@@ -613,7 +636,7 @@ class BPServiceActor implements Runnable {
     //
     //
     while (shouldRun()) {
     while (shouldRun()) {
       try {
       try {
-        long startTime = now();
+        final long startTime = now();
 
 
         //
         //
         // Every so often, send heartbeat or block-report
         // Every so often, send heartbeat or block-report
@@ -659,10 +682,10 @@ class BPServiceActor implements Runnable {
           lastDeletedReport = startTime;
           lastDeletedReport = startTime;
         }
         }
 
 
-        DatanodeCommand cmd = blockReport();
-        processCommand(new DatanodeCommand[]{ cmd });
+        List<DatanodeCommand> cmds = blockReport();
+        processCommand(cmds == null ? null : cmds.toArray(new DatanodeCommand[cmds.size()]));
 
 
-        cmd = cacheReport();
+        DatanodeCommand cmd = cacheReport();
         processCommand(new DatanodeCommand[]{ cmd });
         processCommand(new DatanodeCommand[]{ cmd });
 
 
         // Now safe to start scanning the block pool.
         // Now safe to start scanning the block pool.

+ 5 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java

@@ -23,6 +23,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_SPLIT_THRESHOLD_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY;
@@ -70,6 +72,7 @@ public class DNConf {
   final long readaheadLength;
   final long readaheadLength;
   final long heartBeatInterval;
   final long heartBeatInterval;
   final long blockReportInterval;
   final long blockReportInterval;
+  final long blockReportSplitThreshold;
   final long deleteReportInterval;
   final long deleteReportInterval;
   final long initialBlockReportDelay;
   final long initialBlockReportDelay;
   final long cacheReportInterval;
   final long cacheReportInterval;
@@ -117,6 +120,8 @@ public class DNConf {
         DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT);
         DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT);
     this.blockReportInterval = conf.getLong(DFS_BLOCKREPORT_INTERVAL_MSEC_KEY,
     this.blockReportInterval = conf.getLong(DFS_BLOCKREPORT_INTERVAL_MSEC_KEY,
         DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT);
         DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT);
+    this.blockReportSplitThreshold = conf.getLong(DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY,
+                                            DFS_BLOCKREPORT_SPLIT_THRESHOLD_DEFAULT);
     this.cacheReportInterval = conf.getLong(DFS_CACHEREPORT_INTERVAL_MSEC_KEY,
     this.cacheReportInterval = conf.getLong(DFS_CACHEREPORT_INTERVAL_MSEC_KEY,
         DFS_CACHEREPORT_INTERVAL_MSEC_DEFAULT);
         DFS_CACHEREPORT_INTERVAL_MSEC_DEFAULT);
     
     

+ 31 - 13
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java

@@ -36,6 +36,7 @@ import java.net.SocketTimeoutException;
 import java.net.URI;
 import java.net.URI;
 import java.net.UnknownHostException;
 import java.net.UnknownHostException;
 import java.nio.channels.ClosedByInterruptException;
 import java.nio.channels.ClosedByInterruptException;
+import java.nio.channels.ClosedChannelException;
 import java.nio.channels.SocketChannel;
 import java.nio.channels.SocketChannel;
 import java.security.PrivilegedExceptionAction;
 import java.security.PrivilegedExceptionAction;
 import java.util.ArrayList;
 import java.util.ArrayList;
@@ -51,7 +52,6 @@ import java.util.concurrent.atomic.AtomicInteger;
 
 
 import javax.management.ObjectName;
 import javax.management.ObjectName;
 
 
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -362,13 +362,13 @@ public class DataNode extends Configured
         .setConf(conf).setACL(new AccessControlList(conf.get(DFS_ADMIN, " ")));
         .setConf(conf).setACL(new AccessControlList(conf.get(DFS_ADMIN, " ")));
 
 
     HttpConfig.Policy policy = DFSUtil.getHttpPolicy(conf);
     HttpConfig.Policy policy = DFSUtil.getHttpPolicy(conf);
-    InetSocketAddress infoSocAddr = DataNode.getInfoAddr(conf);
-    String infoHost = infoSocAddr.getHostName();
 
 
     if (policy.isHttpEnabled()) {
     if (policy.isHttpEnabled()) {
       if (secureResources == null) {
       if (secureResources == null) {
+        InetSocketAddress infoSocAddr = DataNode.getInfoAddr(conf);
         int port = infoSocAddr.getPort();
         int port = infoSocAddr.getPort();
-        builder.addEndpoint(URI.create("http://" + infoHost + ":" + port));
+        builder.addEndpoint(URI.create("http://"
+            + NetUtils.getHostPortString(infoSocAddr)));
         if (port == 0) {
         if (port == 0) {
           builder.setFindPort(true);
           builder.setFindPort(true);
         }
         }
@@ -381,7 +381,7 @@ public class DataNode extends Configured
 
 
     if (policy.isHttpsEnabled()) {
     if (policy.isHttpsEnabled()) {
       InetSocketAddress secInfoSocAddr = NetUtils.createSocketAddr(conf.get(
       InetSocketAddress secInfoSocAddr = NetUtils.createSocketAddr(conf.get(
-          DFS_DATANODE_HTTPS_ADDRESS_KEY, infoHost + ":" + 0));
+          DFS_DATANODE_HTTPS_ADDRESS_KEY, DFS_DATANODE_HTTPS_ADDRESS_DEFAULT));
 
 
       Configuration sslConf = DFSUtil.loadSslConfiguration(conf);
       Configuration sslConf = DFSUtil.loadSslConfiguration(conf);
       DFSUtil.loadSslConfToHttpServerBuilder(builder, sslConf);
       DFSUtil.loadSslConfToHttpServerBuilder(builder, sslConf);
@@ -390,7 +390,8 @@ public class DataNode extends Configured
       if (port == 0) {
       if (port == 0) {
         builder.setFindPort(true);
         builder.setFindPort(true);
       }
       }
-      builder.addEndpoint(URI.create("https://" + infoHost + ":" + port));
+      builder.addEndpoint(URI.create("https://"
+          + NetUtils.getHostPortString(secInfoSocAddr)));
     }
     }
 
 
     this.infoServer = builder.build();
     this.infoServer = builder.build();
@@ -1194,7 +1195,7 @@ public class DataNode extends Configured
   
   
   private void checkBlockToken(ExtendedBlock block, Token<BlockTokenIdentifier> token,
   private void checkBlockToken(ExtendedBlock block, Token<BlockTokenIdentifier> token,
       AccessMode accessMode) throws IOException {
       AccessMode accessMode) throws IOException {
-    if (isBlockTokenEnabled && UserGroupInformation.isSecurityEnabled()) {
+    if (isBlockTokenEnabled) {
       BlockTokenIdentifier id = new BlockTokenIdentifier();
       BlockTokenIdentifier id = new BlockTokenIdentifier();
       ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier());
       ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier());
       DataInputStream in = new DataInputStream(buf);
       DataInputStream in = new DataInputStream(buf);
@@ -1324,12 +1325,7 @@ public class DataNode extends Configured
   protected void checkDiskError(Exception e ) throws IOException {
   protected void checkDiskError(Exception e ) throws IOException {
     
     
     LOG.warn("checkDiskError: exception: ", e);
     LOG.warn("checkDiskError: exception: ", e);
-    if (e instanceof SocketException || e instanceof SocketTimeoutException
-    	  || e instanceof ClosedByInterruptException 
-    	  || e.getMessage().startsWith("An established connection was aborted")
-    	  || e.getMessage().startsWith("Broken pipe")
-    	  || e.getMessage().startsWith("Connection reset")
-    	  || e.getMessage().contains("java.nio.channels.SocketChannel")) {
+    if (isNetworkRelatedException(e)) {
       LOG.info("Not checking disk as checkDiskError was called on a network" +
       LOG.info("Not checking disk as checkDiskError was called on a network" +
       		" related exception");	
       		" related exception");	
       return;
       return;
@@ -1342,6 +1338,28 @@ public class DataNode extends Configured
     }
     }
   }
   }
   
   
+  /**
+   * Check if the provided exception looks like it's from a network error
+   * @param e the exception from a checkDiskError call
+   * @return true if this exception is network related, false otherwise
+   */
+  protected boolean isNetworkRelatedException(Exception e) {
+    if (e instanceof SocketException 
+        || e instanceof SocketTimeoutException
+        || e instanceof ClosedChannelException 
+        || e instanceof ClosedByInterruptException) {
+      return true;
+    }
+    
+    String msg = e.getMessage();
+    
+    return null != msg 
+        && (msg.startsWith("An established connection was aborted")
+            || msg.startsWith("Broken pipe")
+            || msg.startsWith("Connection reset")
+            || msg.contains("java.nio.channels.SocketChannel"));
+  }
+  
   /**
   /**
    *  Check if there is a disk failure and if so, handle the error
    *  Check if there is a disk failure and if so, handle the error
    */
    */

+ 4 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/SecureDataNodeStarter.java

@@ -25,6 +25,7 @@ import org.apache.commons.daemon.DaemonContext;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
 import org.apache.hadoop.http.HttpConfig;
 import org.apache.hadoop.http.HttpConfig;
 import org.apache.hadoop.http.HttpServer2;
 import org.apache.hadoop.http.HttpServer2;
@@ -62,7 +63,9 @@ public class SecureDataNodeStarter implements Daemon {
   @Override
   @Override
   public void init(DaemonContext context) throws Exception {
   public void init(DaemonContext context) throws Exception {
     System.err.println("Initializing secure datanode resources");
     System.err.println("Initializing secure datanode resources");
-    Configuration conf = new Configuration();
+    // Create a new HdfsConfiguration object to ensure that the configuration in
+    // hdfs-site.xml is picked up.
+    Configuration conf = new HdfsConfiguration();
     
     
     // Stash command-line arguments for regular datanode
     // Stash command-line arguments for regular datanode
     args = context.getArguments();
     args = context.getArguments();

+ 91 - 53
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java

@@ -17,6 +17,7 @@
  */
  */
 package org.apache.hadoop.hdfs.server.namenode;
 package org.apache.hadoop.hdfs.server.namenode;
 
 
+import static org.apache.hadoop.hdfs.server.namenode.FSImageFormat.renameReservedPathsOnUpgrade;
 import static org.apache.hadoop.util.Time.now;
 import static org.apache.hadoop.util.Time.now;
 
 
 import java.io.FilterInputStream;
 import java.io.FilterInputStream;
@@ -292,8 +293,10 @@ public class FSEditLogLoader {
     switch (op.opCode) {
     switch (op.opCode) {
     case OP_ADD: {
     case OP_ADD: {
       AddCloseOp addCloseOp = (AddCloseOp)op;
       AddCloseOp addCloseOp = (AddCloseOp)op;
+      final String path =
+          renameReservedPathsOnUpgrade(addCloseOp.path, logVersion);
       if (FSNamesystem.LOG.isDebugEnabled()) {
       if (FSNamesystem.LOG.isDebugEnabled()) {
-        FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path +
+        FSNamesystem.LOG.debug(op.opCode + ": " + path +
             " numblocks : " + addCloseOp.blocks.length +
             " numblocks : " + addCloseOp.blocks.length +
             " clientHolder " + addCloseOp.clientName +
             " clientHolder " + addCloseOp.clientName +
             " clientMachine " + addCloseOp.clientMachine);
             " clientMachine " + addCloseOp.clientMachine);
@@ -304,9 +307,9 @@ public class FSEditLogLoader {
       // 3. OP_ADD to open file for append
       // 3. OP_ADD to open file for append
 
 
       // See if the file already exists (persistBlocks call)
       // See if the file already exists (persistBlocks call)
-      final INodesInPath iip = fsDir.getLastINodeInPath(addCloseOp.path);
+      final INodesInPath iip = fsDir.getLastINodeInPath(path);
       final INodeFile oldFile = INodeFile.valueOf(
       final INodeFile oldFile = INodeFile.valueOf(
-          iip.getINode(0), addCloseOp.path, true);
+          iip.getINode(0), path, true);
       INodeFile newFile = oldFile;
       INodeFile newFile = oldFile;
       if (oldFile == null) { // this is OP_ADD on a new file (case 1)
       if (oldFile == null) { // this is OP_ADD on a new file (case 1)
         // versions > 0 support per file replication
         // versions > 0 support per file replication
@@ -319,10 +322,10 @@ public class FSEditLogLoader {
         inodeId = getAndUpdateLastInodeId(addCloseOp.inodeId, logVersion,
         inodeId = getAndUpdateLastInodeId(addCloseOp.inodeId, logVersion,
             lastInodeId);
             lastInodeId);
         newFile = fsDir.unprotectedAddFile(inodeId,
         newFile = fsDir.unprotectedAddFile(inodeId,
-            addCloseOp.path, addCloseOp.permissions, replication,
+            path, addCloseOp.permissions, replication,
             addCloseOp.mtime, addCloseOp.atime, addCloseOp.blockSize, true,
             addCloseOp.mtime, addCloseOp.atime, addCloseOp.blockSize, true,
             addCloseOp.clientName, addCloseOp.clientMachine);
             addCloseOp.clientName, addCloseOp.clientMachine);
-        fsNamesys.leaseManager.addLease(addCloseOp.clientName, addCloseOp.path);
+        fsNamesys.leaseManager.addLease(addCloseOp.clientName, path);
 
 
         // add the op into retry cache if necessary
         // add the op into retry cache if necessary
         if (toAddRetryCache) {
         if (toAddRetryCache) {
@@ -338,11 +341,11 @@ public class FSEditLogLoader {
             FSNamesystem.LOG.debug("Reopening an already-closed file " +
             FSNamesystem.LOG.debug("Reopening an already-closed file " +
                 "for append");
                 "for append");
           }
           }
-          LocatedBlock lb = fsNamesys.prepareFileForWrite(addCloseOp.path,
+          LocatedBlock lb = fsNamesys.prepareFileForWrite(path,
               oldFile, addCloseOp.clientName, addCloseOp.clientMachine, null,
               oldFile, addCloseOp.clientName, addCloseOp.clientMachine, null,
               false, iip.getLatestSnapshotId(), false);
               false, iip.getLatestSnapshotId(), false);
-          newFile = INodeFile.valueOf(fsDir.getINode(addCloseOp.path),
-              addCloseOp.path, true);
+          newFile = INodeFile.valueOf(fsDir.getINode(path),
+              path, true);
           
           
           // add the op into retry cache is necessary
           // add the op into retry cache is necessary
           if (toAddRetryCache) {
           if (toAddRetryCache) {
@@ -363,16 +366,17 @@ public class FSEditLogLoader {
     }
     }
     case OP_CLOSE: {
     case OP_CLOSE: {
       AddCloseOp addCloseOp = (AddCloseOp)op;
       AddCloseOp addCloseOp = (AddCloseOp)op;
-      
+      final String path =
+          renameReservedPathsOnUpgrade(addCloseOp.path, logVersion);
       if (FSNamesystem.LOG.isDebugEnabled()) {
       if (FSNamesystem.LOG.isDebugEnabled()) {
-        FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path +
+        FSNamesystem.LOG.debug(op.opCode + ": " + path +
             " numblocks : " + addCloseOp.blocks.length +
             " numblocks : " + addCloseOp.blocks.length +
             " clientHolder " + addCloseOp.clientName +
             " clientHolder " + addCloseOp.clientName +
             " clientMachine " + addCloseOp.clientMachine);
             " clientMachine " + addCloseOp.clientMachine);
       }
       }
 
 
-      final INodesInPath iip = fsDir.getLastINodeInPath(addCloseOp.path);
-      final INodeFile file = INodeFile.valueOf(iip.getINode(0), addCloseOp.path);
+      final INodesInPath iip = fsDir.getLastINodeInPath(path);
+      final INodeFile file = INodeFile.valueOf(iip.getINode(0), path);
 
 
       // Update the salient file attributes.
       // Update the salient file attributes.
       file.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
       file.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
@@ -386,24 +390,26 @@ public class FSEditLogLoader {
         // could show up twice in a row. But after that version, this
         // could show up twice in a row. But after that version, this
         // should be fixed, so we should treat it as an error.
         // should be fixed, so we should treat it as an error.
         throw new IOException(
         throw new IOException(
-            "File is not under construction: " + addCloseOp.path);
+            "File is not under construction: " + path);
       }
       }
       // One might expect that you could use removeLease(holder, path) here,
       // One might expect that you could use removeLease(holder, path) here,
       // but OP_CLOSE doesn't serialize the holder. So, remove by path.
       // but OP_CLOSE doesn't serialize the holder. So, remove by path.
       if (file.isUnderConstruction()) {
       if (file.isUnderConstruction()) {
-        fsNamesys.leaseManager.removeLeaseWithPrefixPath(addCloseOp.path);
+        fsNamesys.leaseManager.removeLeaseWithPrefixPath(path);
         file.toCompleteFile(file.getModificationTime());
         file.toCompleteFile(file.getModificationTime());
       }
       }
       break;
       break;
     }
     }
     case OP_UPDATE_BLOCKS: {
     case OP_UPDATE_BLOCKS: {
       UpdateBlocksOp updateOp = (UpdateBlocksOp)op;
       UpdateBlocksOp updateOp = (UpdateBlocksOp)op;
+      final String path =
+          renameReservedPathsOnUpgrade(updateOp.path, logVersion);
       if (FSNamesystem.LOG.isDebugEnabled()) {
       if (FSNamesystem.LOG.isDebugEnabled()) {
-        FSNamesystem.LOG.debug(op.opCode + ": " + updateOp.path +
+        FSNamesystem.LOG.debug(op.opCode + ": " + path +
             " numblocks : " + updateOp.blocks.length);
             " numblocks : " + updateOp.blocks.length);
       }
       }
-      INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(updateOp.path),
-          updateOp.path);
+      INodeFile oldFile = INodeFile.valueOf(fsDir.getINode(path),
+          path);
       // Update in-memory data structures
       // Update in-memory data structures
       updateBlocks(fsDir, updateOp, oldFile);
       updateBlocks(fsDir, updateOp, oldFile);
       
       
@@ -414,7 +420,7 @@ public class FSEditLogLoader {
     }
     }
     case OP_ADD_BLOCK: {
     case OP_ADD_BLOCK: {
       AddBlockOp addBlockOp = (AddBlockOp) op;
       AddBlockOp addBlockOp = (AddBlockOp) op;
-      String path = addBlockOp.getPath();
+      String path = renameReservedPathsOnUpgrade(addBlockOp.getPath(), logVersion);
       if (FSNamesystem.LOG.isDebugEnabled()) {
       if (FSNamesystem.LOG.isDebugEnabled()) {
         FSNamesystem.LOG.debug(op.opCode + ": " + path +
         FSNamesystem.LOG.debug(op.opCode + ": " + path +
             " new block id : " + addBlockOp.getLastBlock().getBlockId());
             " new block id : " + addBlockOp.getLastBlock().getBlockId());
@@ -428,14 +434,20 @@ public class FSEditLogLoader {
       SetReplicationOp setReplicationOp = (SetReplicationOp)op;
       SetReplicationOp setReplicationOp = (SetReplicationOp)op;
       short replication = fsNamesys.getBlockManager().adjustReplication(
       short replication = fsNamesys.getBlockManager().adjustReplication(
           setReplicationOp.replication);
           setReplicationOp.replication);
-      fsDir.unprotectedSetReplication(setReplicationOp.path,
+      fsDir.unprotectedSetReplication(
+          renameReservedPathsOnUpgrade(setReplicationOp.path, logVersion),
                                       replication, null);
                                       replication, null);
       break;
       break;
     }
     }
     case OP_CONCAT_DELETE: {
     case OP_CONCAT_DELETE: {
       ConcatDeleteOp concatDeleteOp = (ConcatDeleteOp)op;
       ConcatDeleteOp concatDeleteOp = (ConcatDeleteOp)op;
-      fsDir.unprotectedConcat(concatDeleteOp.trg, concatDeleteOp.srcs,
-          concatDeleteOp.timestamp);
+      String trg = renameReservedPathsOnUpgrade(concatDeleteOp.trg, logVersion);
+      String[] srcs = new String[concatDeleteOp.srcs.length];
+      for (int i=0; i<srcs.length; i++) {
+        srcs[i] =
+            renameReservedPathsOnUpgrade(concatDeleteOp.srcs[i], logVersion);
+      }
+      fsDir.unprotectedConcat(trg, srcs, concatDeleteOp.timestamp);
       
       
       if (toAddRetryCache) {
       if (toAddRetryCache) {
         fsNamesys.addCacheEntry(concatDeleteOp.rpcClientId,
         fsNamesys.addCacheEntry(concatDeleteOp.rpcClientId,
@@ -445,7 +457,9 @@ public class FSEditLogLoader {
     }
     }
     case OP_RENAME_OLD: {
     case OP_RENAME_OLD: {
       RenameOldOp renameOp = (RenameOldOp)op;
       RenameOldOp renameOp = (RenameOldOp)op;
-      fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst,
+      final String src = renameReservedPathsOnUpgrade(renameOp.src, logVersion);
+      final String dst = renameReservedPathsOnUpgrade(renameOp.dst, logVersion);
+      fsDir.unprotectedRenameTo(src, dst,
                                 renameOp.timestamp);
                                 renameOp.timestamp);
       
       
       if (toAddRetryCache) {
       if (toAddRetryCache) {
@@ -455,7 +469,9 @@ public class FSEditLogLoader {
     }
     }
     case OP_DELETE: {
     case OP_DELETE: {
       DeleteOp deleteOp = (DeleteOp)op;
       DeleteOp deleteOp = (DeleteOp)op;
-      fsDir.unprotectedDelete(deleteOp.path, deleteOp.timestamp);
+      fsDir.unprotectedDelete(
+          renameReservedPathsOnUpgrade(deleteOp.path, logVersion),
+          deleteOp.timestamp);
       
       
       if (toAddRetryCache) {
       if (toAddRetryCache) {
         fsNamesys.addCacheEntry(deleteOp.rpcClientId, deleteOp.rpcCallId);
         fsNamesys.addCacheEntry(deleteOp.rpcClientId, deleteOp.rpcCallId);
@@ -466,8 +482,9 @@ public class FSEditLogLoader {
       MkdirOp mkdirOp = (MkdirOp)op;
       MkdirOp mkdirOp = (MkdirOp)op;
       inodeId = getAndUpdateLastInodeId(mkdirOp.inodeId, logVersion,
       inodeId = getAndUpdateLastInodeId(mkdirOp.inodeId, logVersion,
           lastInodeId);
           lastInodeId);
-      fsDir.unprotectedMkdir(inodeId, mkdirOp.path, mkdirOp.permissions,
-                             mkdirOp.timestamp);
+      fsDir.unprotectedMkdir(inodeId,
+          renameReservedPathsOnUpgrade(mkdirOp.path, logVersion),
+          mkdirOp.permissions, mkdirOp.timestamp);
       break;
       break;
     }
     }
     case OP_SET_GENSTAMP_V1: {
     case OP_SET_GENSTAMP_V1: {
@@ -477,53 +494,56 @@ public class FSEditLogLoader {
     }
     }
     case OP_SET_PERMISSIONS: {
     case OP_SET_PERMISSIONS: {
       SetPermissionsOp setPermissionsOp = (SetPermissionsOp)op;
       SetPermissionsOp setPermissionsOp = (SetPermissionsOp)op;
-      fsDir.unprotectedSetPermission(setPermissionsOp.src,
-                                     setPermissionsOp.permissions);
+      fsDir.unprotectedSetPermission(
+          renameReservedPathsOnUpgrade(setPermissionsOp.src, logVersion),
+          setPermissionsOp.permissions);
       break;
       break;
     }
     }
     case OP_SET_OWNER: {
     case OP_SET_OWNER: {
       SetOwnerOp setOwnerOp = (SetOwnerOp)op;
       SetOwnerOp setOwnerOp = (SetOwnerOp)op;
-      fsDir.unprotectedSetOwner(setOwnerOp.src, setOwnerOp.username,
-                                setOwnerOp.groupname);
+      fsDir.unprotectedSetOwner(
+          renameReservedPathsOnUpgrade(setOwnerOp.src, logVersion),
+          setOwnerOp.username, setOwnerOp.groupname);
       break;
       break;
     }
     }
     case OP_SET_NS_QUOTA: {
     case OP_SET_NS_QUOTA: {
       SetNSQuotaOp setNSQuotaOp = (SetNSQuotaOp)op;
       SetNSQuotaOp setNSQuotaOp = (SetNSQuotaOp)op;
-      fsDir.unprotectedSetQuota(setNSQuotaOp.src,
-                                setNSQuotaOp.nsQuota,
-                                HdfsConstants.QUOTA_DONT_SET);
+      fsDir.unprotectedSetQuota(
+          renameReservedPathsOnUpgrade(setNSQuotaOp.src, logVersion),
+          setNSQuotaOp.nsQuota, HdfsConstants.QUOTA_DONT_SET);
       break;
       break;
     }
     }
     case OP_CLEAR_NS_QUOTA: {
     case OP_CLEAR_NS_QUOTA: {
       ClearNSQuotaOp clearNSQuotaOp = (ClearNSQuotaOp)op;
       ClearNSQuotaOp clearNSQuotaOp = (ClearNSQuotaOp)op;
-      fsDir.unprotectedSetQuota(clearNSQuotaOp.src,
-                                HdfsConstants.QUOTA_RESET,
-                                HdfsConstants.QUOTA_DONT_SET);
+      fsDir.unprotectedSetQuota(
+          renameReservedPathsOnUpgrade(clearNSQuotaOp.src, logVersion),
+          HdfsConstants.QUOTA_RESET, HdfsConstants.QUOTA_DONT_SET);
       break;
       break;
     }
     }
 
 
     case OP_SET_QUOTA:
     case OP_SET_QUOTA:
       SetQuotaOp setQuotaOp = (SetQuotaOp)op;
       SetQuotaOp setQuotaOp = (SetQuotaOp)op;
-      fsDir.unprotectedSetQuota(setQuotaOp.src,
-                                setQuotaOp.nsQuota,
-                                setQuotaOp.dsQuota);
+      fsDir.unprotectedSetQuota(
+          renameReservedPathsOnUpgrade(setQuotaOp.src, logVersion),
+          setQuotaOp.nsQuota, setQuotaOp.dsQuota);
       break;
       break;
 
 
     case OP_TIMES: {
     case OP_TIMES: {
       TimesOp timesOp = (TimesOp)op;
       TimesOp timesOp = (TimesOp)op;
 
 
-      fsDir.unprotectedSetTimes(timesOp.path,
-                                timesOp.mtime,
-                                timesOp.atime, true);
+      fsDir.unprotectedSetTimes(
+          renameReservedPathsOnUpgrade(timesOp.path, logVersion),
+          timesOp.mtime, timesOp.atime, true);
       break;
       break;
     }
     }
     case OP_SYMLINK: {
     case OP_SYMLINK: {
       SymlinkOp symlinkOp = (SymlinkOp)op;
       SymlinkOp symlinkOp = (SymlinkOp)op;
       inodeId = getAndUpdateLastInodeId(symlinkOp.inodeId, logVersion,
       inodeId = getAndUpdateLastInodeId(symlinkOp.inodeId, logVersion,
           lastInodeId);
           lastInodeId);
-      fsDir.unprotectedAddSymlink(inodeId, symlinkOp.path,
-                                  symlinkOp.value, symlinkOp.mtime, 
-                                  symlinkOp.atime, symlinkOp.permissionStatus);
+      fsDir.unprotectedAddSymlink(inodeId,
+          renameReservedPathsOnUpgrade(symlinkOp.path, logVersion),
+          symlinkOp.value, symlinkOp.mtime, symlinkOp.atime,
+          symlinkOp.permissionStatus);
       
       
       if (toAddRetryCache) {
       if (toAddRetryCache) {
         fsNamesys.addCacheEntry(symlinkOp.rpcClientId, symlinkOp.rpcCallId);
         fsNamesys.addCacheEntry(symlinkOp.rpcClientId, symlinkOp.rpcCallId);
@@ -532,8 +552,10 @@ public class FSEditLogLoader {
     }
     }
     case OP_RENAME: {
     case OP_RENAME: {
       RenameOp renameOp = (RenameOp)op;
       RenameOp renameOp = (RenameOp)op;
-      fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst,
-                                renameOp.timestamp, renameOp.options);
+      fsDir.unprotectedRenameTo(
+          renameReservedPathsOnUpgrade(renameOp.src, logVersion),
+          renameReservedPathsOnUpgrade(renameOp.dst, logVersion),
+          renameOp.timestamp, renameOp.options);
       
       
       if (toAddRetryCache) {
       if (toAddRetryCache) {
         fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId);
         fsNamesys.addCacheEntry(renameOp.rpcClientId, renameOp.rpcCallId);
@@ -576,10 +598,12 @@ public class FSEditLogLoader {
 
 
       Lease lease = fsNamesys.leaseManager.getLease(
       Lease lease = fsNamesys.leaseManager.getLease(
           reassignLeaseOp.leaseHolder);
           reassignLeaseOp.leaseHolder);
-      INodeFile pendingFile = fsDir.getINode(reassignLeaseOp.path).asFile();
+      final String path =
+          renameReservedPathsOnUpgrade(reassignLeaseOp.path, logVersion);
+      INodeFile pendingFile = fsDir.getINode(path).asFile();
       Preconditions.checkState(pendingFile.isUnderConstruction());
       Preconditions.checkState(pendingFile.isUnderConstruction());
       fsNamesys.reassignLeaseInternal(lease,
       fsNamesys.reassignLeaseInternal(lease,
-          reassignLeaseOp.path, reassignLeaseOp.newHolder, pendingFile);
+          path, reassignLeaseOp.newHolder, pendingFile);
       break;
       break;
     }
     }
     case OP_START_LOG_SEGMENT:
     case OP_START_LOG_SEGMENT:
@@ -589,8 +613,11 @@ public class FSEditLogLoader {
     }
     }
     case OP_CREATE_SNAPSHOT: {
     case OP_CREATE_SNAPSHOT: {
       CreateSnapshotOp createSnapshotOp = (CreateSnapshotOp) op;
       CreateSnapshotOp createSnapshotOp = (CreateSnapshotOp) op;
+      final String snapshotRoot =
+          renameReservedPathsOnUpgrade(createSnapshotOp.snapshotRoot,
+              logVersion);
       String path = fsNamesys.getSnapshotManager().createSnapshot(
       String path = fsNamesys.getSnapshotManager().createSnapshot(
-          createSnapshotOp.snapshotRoot, createSnapshotOp.snapshotName);
+          snapshotRoot, createSnapshotOp.snapshotName);
       if (toAddRetryCache) {
       if (toAddRetryCache) {
         fsNamesys.addCacheEntryWithPayload(createSnapshotOp.rpcClientId,
         fsNamesys.addCacheEntryWithPayload(createSnapshotOp.rpcClientId,
             createSnapshotOp.rpcCallId, path);
             createSnapshotOp.rpcCallId, path);
@@ -601,8 +628,11 @@ public class FSEditLogLoader {
       DeleteSnapshotOp deleteSnapshotOp = (DeleteSnapshotOp) op;
       DeleteSnapshotOp deleteSnapshotOp = (DeleteSnapshotOp) op;
       BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
       BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
       List<INode> removedINodes = new ChunkedArrayList<INode>();
       List<INode> removedINodes = new ChunkedArrayList<INode>();
+      final String snapshotRoot =
+          renameReservedPathsOnUpgrade(deleteSnapshotOp.snapshotRoot,
+              logVersion);
       fsNamesys.getSnapshotManager().deleteSnapshot(
       fsNamesys.getSnapshotManager().deleteSnapshot(
-          deleteSnapshotOp.snapshotRoot, deleteSnapshotOp.snapshotName,
+          snapshotRoot, deleteSnapshotOp.snapshotName,
           collectedBlocks, removedINodes);
           collectedBlocks, removedINodes);
       fsNamesys.removeBlocksAndUpdateSafemodeTotal(collectedBlocks);
       fsNamesys.removeBlocksAndUpdateSafemodeTotal(collectedBlocks);
       collectedBlocks.clear();
       collectedBlocks.clear();
@@ -617,8 +647,11 @@ public class FSEditLogLoader {
     }
     }
     case OP_RENAME_SNAPSHOT: {
     case OP_RENAME_SNAPSHOT: {
       RenameSnapshotOp renameSnapshotOp = (RenameSnapshotOp) op;
       RenameSnapshotOp renameSnapshotOp = (RenameSnapshotOp) op;
+      final String snapshotRoot =
+          renameReservedPathsOnUpgrade(renameSnapshotOp.snapshotRoot,
+              logVersion);
       fsNamesys.getSnapshotManager().renameSnapshot(
       fsNamesys.getSnapshotManager().renameSnapshot(
-          renameSnapshotOp.snapshotRoot, renameSnapshotOp.snapshotOldName,
+          snapshotRoot, renameSnapshotOp.snapshotOldName,
           renameSnapshotOp.snapshotNewName);
           renameSnapshotOp.snapshotNewName);
       
       
       if (toAddRetryCache) {
       if (toAddRetryCache) {
@@ -629,14 +662,19 @@ public class FSEditLogLoader {
     }
     }
     case OP_ALLOW_SNAPSHOT: {
     case OP_ALLOW_SNAPSHOT: {
       AllowSnapshotOp allowSnapshotOp = (AllowSnapshotOp) op;
       AllowSnapshotOp allowSnapshotOp = (AllowSnapshotOp) op;
+      final String snapshotRoot =
+          renameReservedPathsOnUpgrade(allowSnapshotOp.snapshotRoot, logVersion);
       fsNamesys.getSnapshotManager().setSnapshottable(
       fsNamesys.getSnapshotManager().setSnapshottable(
-          allowSnapshotOp.snapshotRoot, false);
+          snapshotRoot, false);
       break;
       break;
     }
     }
     case OP_DISALLOW_SNAPSHOT: {
     case OP_DISALLOW_SNAPSHOT: {
       DisallowSnapshotOp disallowSnapshotOp = (DisallowSnapshotOp) op;
       DisallowSnapshotOp disallowSnapshotOp = (DisallowSnapshotOp) op;
+      final String snapshotRoot =
+          renameReservedPathsOnUpgrade(disallowSnapshotOp.snapshotRoot,
+              logVersion);
       fsNamesys.getSnapshotManager().resetSnapshottable(
       fsNamesys.getSnapshotManager().resetSnapshottable(
-          disallowSnapshotOp.snapshotRoot);
+          snapshotRoot);
       break;
       break;
     }
     }
     case OP_SET_GENSTAMP_V2: {
     case OP_SET_GENSTAMP_V2: {

+ 167 - 10
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java

@@ -32,12 +32,13 @@ import java.security.DigestOutputStream;
 import java.security.MessageDigest;
 import java.security.MessageDigest;
 import java.util.ArrayList;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashMap;
 import java.util.List;
 import java.util.List;
 import java.util.Map;
 import java.util.Map;
+import java.util.TreeMap;
 
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
-import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
@@ -45,13 +46,15 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathIsNotDirectoryException;
 import org.apache.hadoop.fs.PathIsNotDirectoryException;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.permission.PermissionStatus;
 import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocol.LayoutFlags;
 import org.apache.hadoop.hdfs.protocol.LayoutVersion;
 import org.apache.hadoop.hdfs.protocol.LayoutVersion;
 import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
 import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
-import org.apache.hadoop.hdfs.protocol.LayoutFlags;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
 import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiffList;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiffList;
@@ -68,8 +71,10 @@ import org.apache.hadoop.hdfs.util.ReadOnlyList;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.util.StringUtils;
 
 
 import com.google.common.base.Preconditions;
 import com.google.common.base.Preconditions;
+import com.google.common.annotations.VisibleForTesting;
 
 
 /**
 /**
  * Contains inner classes for reading or writing the on-disk format for
  * Contains inner classes for reading or writing the on-disk format for
@@ -468,7 +473,8 @@ public class FSImageFormat {
     }
     }
     
     
   /** 
   /** 
-   * load fsimage files assuming only local names are stored
+   * load fsimage files assuming only local names are stored. Used when
+   * snapshots are not supported by the layout version.
    *   
    *   
    * @param numFiles number of files expected to be read
    * @param numFiles number of files expected to be read
    * @param in image input stream
    * @param in image input stream
@@ -584,6 +590,8 @@ public class FSImageFormat {
     */
     */
    private int loadDirectory(DataInput in, Counter counter) throws IOException {
    private int loadDirectory(DataInput in, Counter counter) throws IOException {
      String parentPath = FSImageSerialization.readString(in);
      String parentPath = FSImageSerialization.readString(in);
+     // Rename .snapshot paths if we're doing an upgrade
+     parentPath = renameReservedPathsOnUpgrade(parentPath, getLayoutVersion());
      final INodeDirectory parent = INodeDirectory.valueOf(
      final INodeDirectory parent = INodeDirectory.valueOf(
          namesystem.dir.rootDir.getNode(parentPath, true), parentPath);
          namesystem.dir.rootDir.getNode(parentPath, true), parentPath);
      return loadChildren(parent, in, counter);
      return loadChildren(parent, in, counter);
@@ -643,11 +651,9 @@ public class FSImageFormat {
    */
    */
   private void addToParent(INodeDirectory parent, INode child) {
   private void addToParent(INodeDirectory parent, INode child) {
     FSDirectory fsDir = namesystem.dir;
     FSDirectory fsDir = namesystem.dir;
-    if (parent == fsDir.rootDir && FSDirectory.isReservedName(child)) {
-        throw new HadoopIllegalArgumentException("File name \""
-            + child.getLocalName() + "\" is reserved. Please "
-            + " change the name of the existing file or directory to another "
-            + "name before upgrading to this release.");
+    if (parent == fsDir.rootDir) {
+        child.setLocalName(renameReservedRootComponentOnUpgrade(
+            child.getLocalNameBytes(), getLayoutVersion()));
     }
     }
     // NOTE: This does not update space counts for parents
     // NOTE: This does not update space counts for parents
     if (!parent.addChild(child)) {
     if (!parent.addChild(child)) {
@@ -684,7 +690,9 @@ public class FSImageFormat {
     public INode loadINodeWithLocalName(boolean isSnapshotINode,
     public INode loadINodeWithLocalName(boolean isSnapshotINode,
         DataInput in, boolean updateINodeMap, Counter counter)
         DataInput in, boolean updateINodeMap, Counter counter)
         throws IOException {
         throws IOException {
-      final byte[] localName = FSImageSerialization.readLocalName(in);
+      byte[] localName = FSImageSerialization.readLocalName(in);
+      localName =
+          renameReservedComponentOnUpgrade(localName, getLayoutVersion());
       INode inode = loadINode(localName, isSnapshotINode, in, counter);
       INode inode = loadINode(localName, isSnapshotINode, in, counter);
       if (updateINodeMap
       if (updateINodeMap
           && LayoutVersion.supports(Feature.ADD_INODE_ID, getLayoutVersion())) {
           && LayoutVersion.supports(Feature.ADD_INODE_ID, getLayoutVersion())) {
@@ -989,7 +997,156 @@ public class FSImageFormat {
       return snapshotMap.get(in.readInt());
       return snapshotMap.get(in.readInt());
     }
     }
   }
   }
-  
+
+  @VisibleForTesting
+  public static TreeMap<String, String> renameReservedMap =
+      new TreeMap<String, String>();
+
+  /**
+   * Use the default key-value pairs that will be used to determine how to
+   * rename reserved paths on upgrade.
+   */
+  @VisibleForTesting
+  public static void useDefaultRenameReservedPairs() {
+    renameReservedMap.clear();
+    for (String key: HdfsConstants.RESERVED_PATH_COMPONENTS) {
+      renameReservedMap.put(
+          key,
+          key + "." + LayoutVersion.getCurrentLayoutVersion() + "."
+              + "UPGRADE_RENAMED");
+    }
+  }
+
+  /**
+   * Set the key-value pairs that will be used to determine how to rename
+   * reserved paths on upgrade.
+   */
+  @VisibleForTesting
+  public static void setRenameReservedPairs(String renameReserved) {
+    // Clear and set the default values
+    useDefaultRenameReservedPairs();
+    // Overwrite with provided values
+    setRenameReservedMapInternal(renameReserved);
+  }
+
+  private static void setRenameReservedMapInternal(String renameReserved) {
+    Collection<String> pairs =
+        StringUtils.getTrimmedStringCollection(renameReserved);
+    for (String p : pairs) {
+      String[] pair = StringUtils.split(p, '/', '=');
+      Preconditions.checkArgument(pair.length == 2,
+          "Could not parse key-value pair " + p);
+      String key = pair[0];
+      String value = pair[1];
+      Preconditions.checkArgument(DFSUtil.isReservedPathComponent(key),
+          "Unknown reserved path " + key);
+      Preconditions.checkArgument(DFSUtil.isValidNameForComponent(value),
+          "Invalid rename path for " + key + ": " + value);
+      LOG.info("Will rename reserved path " + key + " to " + value);
+      renameReservedMap.put(key, value);
+    }
+  }
+
+  /**
+   * When upgrading from an old version, the filesystem could contain paths
+   * that are now reserved in the new version (e.g. .snapshot). This renames
+   * these new reserved paths to a user-specified value to avoid collisions
+   * with the reserved name.
+   * 
+   * @param path Old path potentially containing a reserved path
+   * @return New path with reserved path components renamed to user value
+   */
+  static String renameReservedPathsOnUpgrade(String path,
+      final int layoutVersion) {
+    final String oldPath = path;
+    // If any known LVs aren't supported, we're doing an upgrade
+    if (!LayoutVersion.supports(Feature.ADD_INODE_ID, layoutVersion)) {
+      String[] components = INode.getPathNames(path);
+      // Only need to worry about the root directory
+      if (components.length > 1) {
+        components[1] = DFSUtil.bytes2String(
+            renameReservedRootComponentOnUpgrade(
+                DFSUtil.string2Bytes(components[1]),
+                layoutVersion));
+        path = DFSUtil.strings2PathString(components);
+      }
+    }
+    if (!LayoutVersion.supports(Feature.SNAPSHOT, layoutVersion)) {
+      String[] components = INode.getPathNames(path);
+      // Special case the root path
+      if (components.length == 0) {
+        return path;
+      }
+      for (int i=0; i<components.length; i++) {
+        components[i] = DFSUtil.bytes2String(
+            renameReservedComponentOnUpgrade(
+                DFSUtil.string2Bytes(components[i]),
+                layoutVersion));
+      }
+      path = DFSUtil.strings2PathString(components);
+    }
+
+    if (!path.equals(oldPath)) {
+      LOG.info("Upgrade process renamed reserved path " + oldPath + " to "
+          + path);
+    }
+    return path;
+  }
+
+  private final static String RESERVED_ERROR_MSG = 
+      FSDirectory.DOT_RESERVED_PATH_PREFIX + " is a reserved path and "
+      + HdfsConstants.DOT_SNAPSHOT_DIR + " is a reserved path component in"
+      + " this version of HDFS. Please rollback and delete or rename"
+      + " this path, or upgrade with the "
+      + StartupOption.RENAMERESERVED.getName()
+      + " [key-value pairs]"
+      + " option to automatically rename these paths during upgrade.";
+
+  /**
+   * Same as {@link #renameReservedPathsOnUpgrade(String)}, but for a single
+   * byte array path component.
+   */
+  private static byte[] renameReservedComponentOnUpgrade(byte[] component,
+      final int layoutVersion) {
+    // If the LV doesn't support snapshots, we're doing an upgrade
+    if (!LayoutVersion.supports(Feature.SNAPSHOT, layoutVersion)) {
+      if (Arrays.equals(component, HdfsConstants.DOT_SNAPSHOT_DIR_BYTES)) {
+        Preconditions.checkArgument(
+            renameReservedMap != null &&
+            renameReservedMap.containsKey(HdfsConstants.DOT_SNAPSHOT_DIR),
+            RESERVED_ERROR_MSG);
+        component =
+            DFSUtil.string2Bytes(renameReservedMap
+                .get(HdfsConstants.DOT_SNAPSHOT_DIR));
+      }
+    }
+    return component;
+  }
+
+  /**
+   * Same as {@link #renameReservedPathsOnUpgrade(String)}, but for a single
+   * byte array path component.
+   */
+  private static byte[] renameReservedRootComponentOnUpgrade(byte[] component,
+      final int layoutVersion) {
+    // If the LV doesn't support inode IDs, we're doing an upgrade
+    if (!LayoutVersion.supports(Feature.ADD_INODE_ID, layoutVersion)) {
+      if (Arrays.equals(component, FSDirectory.DOT_RESERVED)) {
+        Preconditions.checkArgument(
+            renameReservedMap != null &&
+            renameReservedMap.containsKey(FSDirectory.DOT_RESERVED_STRING),
+            RESERVED_ERROR_MSG);
+        final String renameString = renameReservedMap
+            .get(FSDirectory.DOT_RESERVED_STRING);
+        component =
+            DFSUtil.string2Bytes(renameString);
+        LOG.info("Renamed root path " + FSDirectory.DOT_RESERVED_STRING
+            + " to " + renameString);
+      }
+    }
+    return component;
+  }
+
   /**
   /**
    * A one-shot class responsible for writing an image file.
    * A one-shot class responsible for writing an image file.
    * The write() function should be called once, after which the getter
    * The write() function should be called once, after which the getter

+ 18 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -1164,7 +1164,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     if (isInSafeMode()) {
     if (isInSafeMode()) {
       SafeModeException se = new SafeModeException(errorMsg, safeMode);
       SafeModeException se = new SafeModeException(errorMsg, safeMode);
       if (haEnabled && haContext != null
       if (haEnabled && haContext != null
-          && haContext.getState().getServiceState() == HAServiceState.ACTIVE) {
+          && haContext.getState().getServiceState() == HAServiceState.ACTIVE
+          && shouldRetrySafeMode(this.safeMode)) {
         throw new RetriableException(se);
         throw new RetriableException(se);
       } else {
       } else {
         throw se;
         throw se;
@@ -1172,6 +1173,18 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
     }
   }
   }
   
   
+  /**
+   * We already know that the safemode is on. We will throw a RetriableException
+   * if the safemode is not manual or caused by low resource.
+   */
+  private boolean shouldRetrySafeMode(SafeModeInfo safeMode) {
+    if (safeMode == null) {
+      return false;
+    } else {
+      return !safeMode.isManual() && !safeMode.areResourcesLow();
+    }
+  }
+  
   public static Collection<URI> getNamespaceDirs(Configuration conf) {
   public static Collection<URI> getNamespaceDirs(Configuration conf) {
     return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY);
     return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY);
   }
   }
@@ -3803,7 +3816,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     final long diff = fileINode.getPreferredBlockSize() - commitBlock.getNumBytes();    
     final long diff = fileINode.getPreferredBlockSize() - commitBlock.getNumBytes();    
     if (diff > 0) {
     if (diff > 0) {
       try {
       try {
-        String path = leaseManager.findPath(fileINode);
+        String path = fileINode.getFullPathName();
         dir.updateSpaceConsumed(path, 0, -diff*fileINode.getFileReplication());
         dir.updateSpaceConsumed(path, 0, -diff*fileINode.getFileReplication());
       } catch (IOException e) {
       } catch (IOException e) {
         LOG.warn("Unexpected exception while updating disk space.", e);
         LOG.warn("Unexpected exception while updating disk space.", e);
@@ -4005,7 +4018,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   @VisibleForTesting
   @VisibleForTesting
   String closeFileCommitBlocks(INodeFile pendingFile, BlockInfo storedBlock)
   String closeFileCommitBlocks(INodeFile pendingFile, BlockInfo storedBlock)
       throws IOException {
       throws IOException {
-    String src = leaseManager.findPath(pendingFile);
+    String src = pendingFile.getFullPathName();
 
 
     // commit the last block and complete it if it has minimum replicas
     // commit the last block and complete it if it has minimum replicas
     commitOrCompleteLastBlock(pendingFile, storedBlock);
     commitOrCompleteLastBlock(pendingFile, storedBlock);
@@ -4027,7 +4040,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   @VisibleForTesting
   @VisibleForTesting
   String persistBlocks(INodeFile pendingFile, boolean logRetryCache)
   String persistBlocks(INodeFile pendingFile, boolean logRetryCache)
       throws IOException {
       throws IOException {
-    String src = leaseManager.findPath(pendingFile);
+    String src = pendingFile.getFullPathName();
     dir.persistBlocks(src, pendingFile, logRetryCache);
     dir.persistBlocks(src, pendingFile, logRetryCache);
     return src;
     return src;
   }
   }
@@ -5955,7 +5968,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         .getDatanodeStorageInfos(newNodes, newStorageIDs);
         .getDatanodeStorageInfos(newNodes, newStorageIDs);
     blockinfo.setExpectedLocations(storages);
     blockinfo.setExpectedLocations(storages);
 
 
-    String src = leaseManager.findPath(pendingFile);
+    String src = pendingFile.getFullPathName();
     dir.persistBlocks(src, pendingFile, logRetryCache);
     dir.persistBlocks(src, pendingFile, logRetryCache);
   }
   }
 
 

+ 0 - 36
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java

@@ -179,24 +179,6 @@ public class LeaseManager {
     return addLease(newHolder, src);
     return addLease(newHolder, src);
   }
   }
 
 
-  /**
-   * Finds the pathname for the specified pendingFile
-   */
-  public synchronized String findPath(INodeFile pendingFile)
-      throws IOException {
-    FileUnderConstructionFeature uc = pendingFile.getFileUnderConstructionFeature();
-    Preconditions.checkArgument(uc != null);
-    Lease lease = getLease(uc.getClientName());
-    if (lease != null) {
-      String src = lease.findPath(pendingFile);
-      if (src != null) {
-        return src;
-      }
-    }
-    throw new IOException("pendingFile (=" + pendingFile + ") not found."
-        + "(lease=" + lease + ")");
-  }
-
   /**
   /**
    * Renew the lease(s) held by the given client
    * Renew the lease(s) held by the given client
    */
    */
@@ -252,24 +234,6 @@ public class LeaseManager {
       return now() - lastUpdate > softLimit;
       return now() - lastUpdate > softLimit;
     }
     }
 
 
-    /**
-     * @return the path associated with the pendingFile and null if not found.
-     */
-    private String findPath(INodeFile pendingFile) {
-      try {
-        for (String src : paths) {
-          INode node = fsnamesystem.dir.getINode(src);
-          if (node == pendingFile
-              || (node.isFile() && node.asFile() == pendingFile)) {
-            return src;
-          }
-        }
-      } catch (UnresolvedLinkException e) {
-        throw new AssertionError("Lease files should reside on this FS");
-      }
-      return null;
-    }
-
     /** Does this lease contain any path? */
     /** Does this lease contain any path? */
     boolean hasPath() {return !paths.isEmpty();}
     boolean hasPath() {return !paths.isEmpty();}
 
 

+ 32 - 7
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java

@@ -212,7 +212,9 @@ public class NameNode implements NameNodeStatusMXBean {
       + StartupOption.CLUSTERID.getName() + " cid ] ["
       + StartupOption.CLUSTERID.getName() + " cid ] ["
       + StartupOption.FORCE.getName() + "] ["
       + StartupOption.FORCE.getName() + "] ["
       + StartupOption.NONINTERACTIVE.getName() + "] ] | ["
       + StartupOption.NONINTERACTIVE.getName() + "] ] | ["
-      + StartupOption.UPGRADE.getName() + "] | ["
+      + StartupOption.UPGRADE.getName() + 
+        " [" + StartupOption.CLUSTERID.getName() + " cid]" +
+        " [" + StartupOption.RENAMERESERVED.getName() + "<k-v pairs>] ] | ["
       + StartupOption.ROLLBACK.getName() + "] | ["
       + StartupOption.ROLLBACK.getName() + "] | ["
       + StartupOption.FINALIZE.getName() + "] | ["
       + StartupOption.FINALIZE.getName() + "] | ["
       + StartupOption.IMPORT.getName() + "] | ["
       + StartupOption.IMPORT.getName() + "] | ["
@@ -1056,7 +1058,8 @@ public class NameNode implements NameNodeStatusMXBean {
     out.println(USAGE + "\n");
     out.println(USAGE + "\n");
   }
   }
 
 
-  private static StartupOption parseArguments(String args[]) {
+  @VisibleForTesting
+  static StartupOption parseArguments(String args[]) {
     int argsLen = (args == null) ? 0 : args.length;
     int argsLen = (args == null) ? 0 : args.length;
     StartupOption startOpt = StartupOption.REGULAR;
     StartupOption startOpt = StartupOption.REGULAR;
     for(int i=0; i < argsLen; i++) {
     for(int i=0; i < argsLen; i++) {
@@ -1103,11 +1106,33 @@ public class NameNode implements NameNodeStatusMXBean {
         startOpt = StartupOption.CHECKPOINT;
         startOpt = StartupOption.CHECKPOINT;
       } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)) {
       } else if (StartupOption.UPGRADE.getName().equalsIgnoreCase(cmd)) {
         startOpt = StartupOption.UPGRADE;
         startOpt = StartupOption.UPGRADE;
-        // might be followed by two args
-        if (i + 2 < argsLen
-            && args[i + 1].equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
-          i += 2;
-          startOpt.setClusterId(args[i]);
+        /* Can be followed by CLUSTERID with a required parameter or
+         * RENAMERESERVED with an optional parameter
+         */
+        while (i + 1 < argsLen) {
+          String flag = args[i + 1];
+          if (flag.equalsIgnoreCase(StartupOption.CLUSTERID.getName())) {
+            if (i + 2 < argsLen) {
+              i += 2;
+              startOpt.setClusterId(args[i]);
+            } else {
+              LOG.fatal("Must specify a valid cluster ID after the "
+                  + StartupOption.CLUSTERID.getName() + " flag");
+              return null;
+            }
+          } else if (flag.equalsIgnoreCase(StartupOption.RENAMERESERVED
+              .getName())) {
+            if (i + 2 < argsLen) {
+              FSImageFormat.setRenameReservedPairs(args[i + 2]);
+              i += 2;
+            } else {
+              FSImageFormat.useDefaultRenameReservedPairs();
+              i += 1;
+            }
+          } else {
+            LOG.fatal("Unknown upgrade flag " + flag);
+            return null;
+          }
         }
         }
       } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) {
       } else if (StartupOption.ROLLBACK.getName().equalsIgnoreCase(cmd)) {
         startOpt = StartupOption.ROLLBACK;
         startOpt = StartupOption.ROLLBACK;

+ 7 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java

@@ -982,13 +982,18 @@ class NameNodeRpcServer implements NamenodeProtocols {
            + "from " + nodeReg + ", reports.length=" + reports.length);
            + "from " + nodeReg + ", reports.length=" + reports.length);
     }
     }
     final BlockManager bm = namesystem.getBlockManager(); 
     final BlockManager bm = namesystem.getBlockManager(); 
+    boolean hasStaleStorages = true;
     for(StorageBlockReport r : reports) {
     for(StorageBlockReport r : reports) {
       final BlockListAsLongs blocks = new BlockListAsLongs(r.getBlocks());
       final BlockListAsLongs blocks = new BlockListAsLongs(r.getBlocks());
-      bm.processReport(nodeReg, r.getStorage(), poolId, blocks);
+      hasStaleStorages = bm.processReport(nodeReg, r.getStorage(), poolId, blocks);
     }
     }
 
 
-    if (nn.getFSImage().isUpgradeFinalized() && !nn.isStandbyState())
+    if (nn.getFSImage().isUpgradeFinalized() &&
+        !nn.isStandbyState() &&
+        !hasStaleStorages) {
       return new FinalizeCommand(poolId);
       return new FinalizeCommand(poolId);
+    }
+
     return null;
     return null;
   }
   }
 
 

+ 2 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/CacheAdmin.java

@@ -962,9 +962,8 @@ public class CacheAdmin extends Configured implements Tool {
       if (numResults > 0) { 
       if (numResults > 0) { 
         System.out.print(listing);
         System.out.print(listing);
       }
       }
-      // If there are no results, we return 1 (failure exit code);
-      // otherwise we return 0 (success exit code).
-      return (numResults == 0) ? 1 : 0;
+      // If list pools succeed, we return 0 (success exit code)
+      return 0;
     }
     }
   }
   }
 
 

+ 4 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java

@@ -188,6 +188,9 @@ public class WebHdfsFileSystem extends FileSystem
       int maxFailoverAttempts = conf.getInt(
       int maxFailoverAttempts = conf.getInt(
           DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY,
           DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY,
           DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT);
           DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT);
+      int maxRetryAttempts = conf.getInt(
+          DFSConfigKeys.DFS_HTTP_CLIENT_RETRY_MAX_ATTEMPTS_KEY,
+          DFSConfigKeys.DFS_HTTP_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT);
       int failoverSleepBaseMillis = conf.getInt(
       int failoverSleepBaseMillis = conf.getInt(
           DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY,
           DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY,
           DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT);
           DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT);
@@ -197,7 +200,7 @@ public class WebHdfsFileSystem extends FileSystem
 
 
       this.retryPolicy = RetryPolicies
       this.retryPolicy = RetryPolicies
           .failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL,
           .failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL,
-              maxFailoverAttempts, failoverSleepBaseMillis,
+              maxFailoverAttempts, maxRetryAttempts, failoverSleepBaseMillis,
               failoverSleepMaxMillis);
               failoverSleepMaxMillis);
     }
     }
 
 

+ 14 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

@@ -482,6 +482,20 @@
   <description>Delay for first block report in seconds.</description>
   <description>Delay for first block report in seconds.</description>
 </property>
 </property>
 
 
+<property>
+    <name>dfs.blockreport.split.threshold</name>
+    <value>1000000</value>
+    <description>If the number of blocks on the DataNode is below this
+    threshold then it will send block reports for all Storage Directories
+    in a single message.
+
+    If the number of blocks exceeds this threshold then the DataNode will
+    send block reports for each Storage Directory in separate messages.
+
+    Set to zero to always split.
+    </description>
+</property>
+
 <property>
 <property>
   <name>dfs.datanode.directoryscan.interval</name>
   <name>dfs.datanode.directoryscan.interval</name>
   <value>21600</value>
   <value>21600</value>

+ 28 - 1
hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsUserGuide.apt.vm

@@ -435,7 +435,7 @@ HDFS Users Guide
    state it was in before the upgrade. HDFS upgrade is described in more
    state it was in before the upgrade. HDFS upgrade is described in more
    detail in {{{http://wiki.apache.org/hadoop/Hadoop_Upgrade}Hadoop Upgrade}}
    detail in {{{http://wiki.apache.org/hadoop/Hadoop_Upgrade}Hadoop Upgrade}}
    Wiki page. HDFS can have one such backup at a time. Before upgrading,
    Wiki page. HDFS can have one such backup at a time. Before upgrading,
-   administrators need to remove existing backupusing bin/hadoop dfsadmin
+   administrators need to remove existing backup using bin/hadoop dfsadmin
    <<<-finalizeUpgrade>>> command. The following briefly describes the
    <<<-finalizeUpgrade>>> command. The following briefly describes the
    typical upgrade procedure:
    typical upgrade procedure:
 
 
@@ -459,6 +459,33 @@ HDFS Users Guide
 
 
           * start the cluster with rollback option. (<<<bin/start-dfs.sh -rollback>>>).
           * start the cluster with rollback option. (<<<bin/start-dfs.sh -rollback>>>).
 
 
+    When upgrading to a new version of HDFS, it is necessary to rename or
+    delete any paths that are reserved in the new version of HDFS. If the
+    NameNode encounters a reserved path during upgrade, it will print an
+    error like the following:
+
+    <<< /.reserved is a reserved path and .snapshot is a
+    reserved path component in this version of HDFS. Please rollback and delete
+    or rename this path, or upgrade with the -renameReserved [key-value pairs]
+    option to automatically rename these paths during upgrade.>>>
+
+    Specifying <<<-upgrade -renameReserved [optional key-value pairs]>>> causes
+    the NameNode to automatically rename any reserved paths found during
+    startup. For example, to rename all paths named <<<.snapshot>>> to
+    <<<.my-snapshot>>> and <<<.reserved>>> to <<<.my-reserved>>>, a user would
+    specify <<<-upgrade -renameReserved
+    .snapshot=.my-snapshot,.reserved=.my-reserved>>>.
+
+    If no key-value pairs are specified with <<<-renameReserved>>>, the
+    NameNode will then suffix reserved paths with
+    <<<.<LAYOUT-VERSION>.UPGRADE_RENAMED>>>, e.g.
+    <<<.snapshot.-51.UPGRADE_RENAMED>>>.
+
+    There are some caveats to this renaming process. It's recommended,
+    if possible, to first <<<hdfs dfsadmin -saveNamespace>>> before upgrading.
+    This is because data inconsistency can result if an edit log operation
+    refers to the destination of an automatically renamed file.
+
 * File Permissions and Security
 * File Permissions and Security
 
 
    The file permissions are designed to be similar to file permissions on
    The file permissions are designed to be similar to file permissions on

+ 14 - 7
hadoop-hdfs-project/hadoop-hdfs/src/site/xdoc/HdfsSnapshots.xml

@@ -20,7 +20,7 @@
   xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
   xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
 
 
   <properties>
   <properties>
-    <title>HFDS Snapshots</title>
+    <title>HDFS Snapshots</title>
   </properties>
   </properties>
 
 
   <body>
   <body>
@@ -99,15 +99,22 @@
     <li>Copying a file from snapshot <code>s0</code>:
     <li>Copying a file from snapshot <code>s0</code>:
       <source>hdfs dfs -cp /foo/.snapshot/s0/bar /tmp</source></li>
       <source>hdfs dfs -cp /foo/.snapshot/s0/bar /tmp</source></li>
   </ul>
   </ul>
-  <p>
-    <b>Note</b> that the name ".snapshot" is now a reserved file name in HDFS
-    so that users cannot create a file/directory with ".snapshot" as the name.
-    If ".snapshot" is used in a previous version of HDFS, it must be renamed before upgrade;
-    otherwise, upgrade will fail. 
-  </p>
   </subsection>
   </subsection>
   </section>
   </section>
 
 
+  <section name="Upgrading to a version of HDFS with snapshots" id="Upgrade">
+
+  <p>
+    The HDFS snapshot feature introduces a new reserved path name used to
+    interact with snapshots: <tt>.snapshot</tt>. When upgrading from an
+    older version of HDFS, existing paths named <tt>.snapshot</tt> need
+    to first be renamed or deleted to avoid conflicting with the reserved path.
+    See the upgrade section in
+    <a href="HdfsUserGuide.html#Upgrade_and_Rollback">the HDFS user guide</a>
+    for more information.  </p>
+
+  </section>
+
   <section name="Snapshot Operations" id="SnapshotOperations">
   <section name="Snapshot Operations" id="SnapshotOperations">
   <subsection name="Administrator Operations" id="AdministratorOperations">
   <subsection name="Administrator Operations" id="AdministratorOperations">
   <p>
   <p>

+ 85 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgradeFromImage.java

@@ -27,6 +27,7 @@ import java.io.File;
 import java.io.FileOutputStream;
 import java.io.FileOutputStream;
 import java.io.FileReader;
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.LinkedList;
 import java.util.TreeMap;
 import java.util.TreeMap;
@@ -43,7 +44,9 @@ import org.apache.hadoop.hdfs.protocol.DirectoryListing;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormat;
 import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
 import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
+import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.log4j.Logger;
 import org.apache.log4j.Logger;
 import org.junit.Test;
 import org.junit.Test;
@@ -67,6 +70,7 @@ public class TestDFSUpgradeFromImage {
   private static final String HADOOP_DFS_DIR_TXT = "hadoop-dfs-dir.txt";
   private static final String HADOOP_DFS_DIR_TXT = "hadoop-dfs-dir.txt";
   private static final String HADOOP22_IMAGE = "hadoop-22-dfs-dir.tgz";
   private static final String HADOOP22_IMAGE = "hadoop-22-dfs-dir.tgz";
   private static final String HADOOP1_BBW_IMAGE = "hadoop1-bbw.tgz";
   private static final String HADOOP1_BBW_IMAGE = "hadoop1-bbw.tgz";
+  private static final String HADOOP2_RESERVED_IMAGE = "hadoop-2-reserved.tgz";
 
 
   private static class ReferenceFileInfo {
   private static class ReferenceFileInfo {
     String path;
     String path;
@@ -320,6 +324,87 @@ public class TestDFSUpgradeFromImage {
       assertEquals("Upgrade did not fail with bad MD5", 1, md5failures);
       assertEquals("Upgrade did not fail with bad MD5", 1, md5failures);
     }
     }
   }
   }
+
+  /**
+   * Test upgrade from 2.0 image with a variety of .snapshot and .reserved
+   * paths to test renaming on upgrade
+   */
+  @Test
+  public void testUpgradeFromRel2ReservedImage() throws IOException {
+    unpackStorage(HADOOP2_RESERVED_IMAGE);
+    MiniDFSCluster cluster = null;
+    // Try it once without setting the upgrade flag to ensure it fails
+    try {
+      cluster =
+          new MiniDFSCluster.Builder(new Configuration())
+              .format(false)
+              .startupOption(StartupOption.UPGRADE)
+              .numDataNodes(0).build();
+    } catch (IllegalArgumentException e) {
+      GenericTestUtils.assertExceptionContains(
+          "reserved path component in this version",
+          e);
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+    // Try it again with a custom rename string
+    try {
+      FSImageFormat.setRenameReservedPairs(
+          ".snapshot=.user-snapshot," +
+          ".reserved=.my-reserved");
+      cluster =
+          new MiniDFSCluster.Builder(new Configuration())
+              .format(false)
+              .startupOption(StartupOption.UPGRADE)
+              .numDataNodes(0).build();
+      // Make sure the paths were renamed as expected
+      DistributedFileSystem dfs = cluster.getFileSystem();
+      ArrayList<Path> toList = new ArrayList<Path>();
+      ArrayList<String> found = new ArrayList<String>();
+      toList.add(new Path("/"));
+      while (!toList.isEmpty()) {
+        Path p = toList.remove(0);
+        FileStatus[] statuses = dfs.listStatus(p);
+        for (FileStatus status: statuses) {
+          final String path = status.getPath().toUri().getPath();
+          System.out.println("Found path " + path);
+          found.add(path);
+          if (status.isDirectory()) {
+            toList.add(status.getPath());
+          }
+        }
+      }
+      String[] expected = new String[] {
+          "/edits",
+          "/edits/.reserved",
+          "/edits/.user-snapshot",
+          "/edits/.user-snapshot/editsdir",
+          "/edits/.user-snapshot/editsdir/editscontents",
+          "/edits/.user-snapshot/editsdir/editsdir2",
+          "/image",
+          "/image/.reserved",
+          "/image/.user-snapshot",
+          "/image/.user-snapshot/imagedir",
+          "/image/.user-snapshot/imagedir/imagecontents",
+          "/image/.user-snapshot/imagedir/imagedir2",
+          "/.my-reserved",
+          "/.my-reserved/edits-touch",
+          "/.my-reserved/image-touch"
+      };
+
+      for (String s: expected) {
+        assertTrue("Did not find expected path " + s, found.contains(s));
+      }
+      assertEquals("Found an unexpected path while listing filesystem",
+          found.size(), expected.length);
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
     
     
   static void recoverAllLeases(DFSClient dfs, 
   static void recoverAllLeases(DFSClient dfs, 
       Path path) throws IOException {
       Path path) throws IOException {

+ 86 - 42
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java

@@ -19,16 +19,19 @@ package org.apache.hadoop.hdfs;
 
 
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 import static org.junit.Assert.fail;
 
 
+import com.google.common.base.Supplier;
+
 import java.io.InputStream;
 import java.io.InputStream;
 import java.io.PrintWriter;
 import java.io.PrintWriter;
-import java.net.InetSocketAddress;
-import java.net.Socket;
 
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataInputStream;
@@ -37,10 +40,8 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
 import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
 import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
-import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
-import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.junit.After;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Before;
@@ -51,10 +52,7 @@ import com.google.common.io.NullOutputStream;
 public class TestDataTransferKeepalive {
 public class TestDataTransferKeepalive {
   Configuration conf = new HdfsConfiguration();
   Configuration conf = new HdfsConfiguration();
   private MiniDFSCluster cluster;
   private MiniDFSCluster cluster;
-  private FileSystem fs;
-  private InetSocketAddress dnAddr;
   private DataNode dn;
   private DataNode dn;
-  private DFSClient dfsClient;
   private static Path TEST_FILE = new Path("/test");
   private static Path TEST_FILE = new Path("/test");
   
   
   private static final int KEEPALIVE_TIMEOUT = 1000;
   private static final int KEEPALIVE_TIMEOUT = 1000;
@@ -69,15 +67,7 @@ public class TestDataTransferKeepalive {
     
     
     cluster = new MiniDFSCluster.Builder(conf)
     cluster = new MiniDFSCluster.Builder(conf)
       .numDataNodes(1).build();
       .numDataNodes(1).build();
-    fs = cluster.getFileSystem();
-    dfsClient = ((DistributedFileSystem)fs).dfs;
-    dfsClient.peerCache.clear();
-
-    String poolId = cluster.getNamesystem().getBlockPoolId();
     dn = cluster.getDataNodes().get(0);
     dn = cluster.getDataNodes().get(0);
-    DatanodeRegistration dnReg = DataNodeTestUtils.getDNRegistrationForBP(
-        dn, poolId);
-    dnAddr = NetUtils.createSocketAddr(dnReg.getXferAddr());
   }
   }
   
   
   @After
   @After
@@ -90,34 +80,86 @@ public class TestDataTransferKeepalive {
    * its configured keepalive timeout.
    * its configured keepalive timeout.
    */
    */
   @Test(timeout=30000)
   @Test(timeout=30000)
-  public void testKeepaliveTimeouts() throws Exception {
+  public void testDatanodeRespectsKeepAliveTimeout() throws Exception {
+    Configuration clientConf = new Configuration(conf);
+    // Set a client socket cache expiry time much longer than 
+    // the datanode-side expiration time.
+    final long CLIENT_EXPIRY_MS = 60000L;
+    clientConf.setLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, CLIENT_EXPIRY_MS);
+    PeerCache.setInstance(DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT, CLIENT_EXPIRY_MS);
+    DistributedFileSystem fs =
+        (DistributedFileSystem)FileSystem.get(cluster.getURI(),
+            clientConf);
+
     DFSTestUtil.createFile(fs, TEST_FILE, 1L, (short)1, 0L);
     DFSTestUtil.createFile(fs, TEST_FILE, 1L, (short)1, 0L);
 
 
     // Clients that write aren't currently re-used.
     // Clients that write aren't currently re-used.
-    assertEquals(0, dfsClient.peerCache.size());
+    assertEquals(0, fs.dfs.peerCache.size());
     assertXceiverCount(0);
     assertXceiverCount(0);
 
 
     // Reads the file, so we should get a
     // Reads the file, so we should get a
     // cached socket, and should have an xceiver on the other side.
     // cached socket, and should have an xceiver on the other side.
     DFSTestUtil.readFile(fs, TEST_FILE);
     DFSTestUtil.readFile(fs, TEST_FILE);
-    assertEquals(1, dfsClient.peerCache.size());
+    assertEquals(1, fs.dfs.peerCache.size());
     assertXceiverCount(1);
     assertXceiverCount(1);
 
 
     // Sleep for a bit longer than the keepalive timeout
     // Sleep for a bit longer than the keepalive timeout
     // and make sure the xceiver died.
     // and make sure the xceiver died.
-    Thread.sleep(KEEPALIVE_TIMEOUT * 2);
+    Thread.sleep(DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_DEFAULT + 1);
     assertXceiverCount(0);
     assertXceiverCount(0);
     
     
     // The socket is still in the cache, because we don't
     // The socket is still in the cache, because we don't
     // notice that it's closed until we try to read
     // notice that it's closed until we try to read
     // from it again.
     // from it again.
-    assertEquals(1, dfsClient.peerCache.size());
+    assertEquals(1, fs.dfs.peerCache.size());
     
     
     // Take it out of the cache - reading should
     // Take it out of the cache - reading should
     // give an EOF.
     // give an EOF.
-    Peer peer = dfsClient.peerCache.get(dn.getDatanodeId(), false);
+    Peer peer = fs.dfs.peerCache.get(dn.getDatanodeId(), false);
     assertNotNull(peer);
     assertNotNull(peer);
     assertEquals(-1, peer.getInputStream().read());
     assertEquals(-1, peer.getInputStream().read());
+    PeerCache.setInstance(DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT,
+        DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_DEFAULT);
+  }
+
+  /**
+   * Test that the client respects its keepalive timeout.
+   */
+  @Test(timeout=30000)
+  public void testClientResponsesKeepAliveTimeout() throws Exception {
+    Configuration clientConf = new Configuration(conf);
+    // Set a client socket cache expiry time much shorter than 
+    // the datanode-side expiration time.
+    final long CLIENT_EXPIRY_MS = 10L;
+    clientConf.setLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, CLIENT_EXPIRY_MS);
+    PeerCache.setInstance(DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT, CLIENT_EXPIRY_MS);
+    DistributedFileSystem fs =
+        (DistributedFileSystem)FileSystem.get(cluster.getURI(),
+            clientConf);
+
+    DFSTestUtil.createFile(fs, TEST_FILE, 1L, (short)1, 0L);
+
+    // Clients that write aren't currently re-used.
+    assertEquals(0, fs.dfs.peerCache.size());
+    assertXceiverCount(0);
+
+    // Reads the file, so we should get a
+    // cached socket, and should have an xceiver on the other side.
+    DFSTestUtil.readFile(fs, TEST_FILE);
+    assertEquals(1, fs.dfs.peerCache.size());
+    assertXceiverCount(1);
+
+    // Sleep for a bit longer than the client keepalive timeout.
+    Thread.sleep(CLIENT_EXPIRY_MS + 1);
+    
+    // Taking out a peer which is expired should give a null.
+    Peer peer = fs.dfs.peerCache.get(dn.getDatanodeId(), false);
+    assertTrue(peer == null);
+
+    // The socket cache is now empty.
+    assertEquals(0, fs.dfs.peerCache.size());
+    PeerCache.setInstance(DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT,
+        DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_DEFAULT);
   }
   }
 
 
   /**
   /**
@@ -125,8 +167,17 @@ public class TestDataTransferKeepalive {
    * read bytes off the stream quickly. The datanode should time out sending the
    * read bytes off the stream quickly. The datanode should time out sending the
    * chunks and the transceiver should die, even if it has a long keepalive.
    * chunks and the transceiver should die, even if it has a long keepalive.
    */
    */
-  @Test(timeout=30000)
+  @Test(timeout=300000)
   public void testSlowReader() throws Exception {
   public void testSlowReader() throws Exception {
+    // Set a client socket cache expiry time much longer than 
+    // the datanode-side expiration time.
+    final long CLIENT_EXPIRY_MS = 600000L;
+    Configuration clientConf = new Configuration(conf);
+    clientConf.setLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, CLIENT_EXPIRY_MS);
+    PeerCache.setInstance(DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT, CLIENT_EXPIRY_MS);
+    DistributedFileSystem fs =
+        (DistributedFileSystem)FileSystem.get(cluster.getURI(),
+            clientConf);
     // Restart the DN with a shorter write timeout.
     // Restart the DN with a shorter write timeout.
     DataNodeProperties props = cluster.stopDataNode(0);
     DataNodeProperties props = cluster.stopDataNode(0);
     props.conf.setInt(DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY,
     props.conf.setInt(DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY,
@@ -134,38 +185,31 @@ public class TestDataTransferKeepalive {
     props.conf.setInt(DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY,
     props.conf.setInt(DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY,
         120000);
         120000);
     assertTrue(cluster.restartDataNode(props, true));
     assertTrue(cluster.restartDataNode(props, true));
+    dn = cluster.getDataNodes().get(0);
     // Wait for heartbeats to avoid a startup race where we
     // Wait for heartbeats to avoid a startup race where we
     // try to write the block while the DN is still starting.
     // try to write the block while the DN is still starting.
     cluster.triggerHeartbeats();
     cluster.triggerHeartbeats();
     
     
-    dn = cluster.getDataNodes().get(0);
-    
     DFSTestUtil.createFile(fs, TEST_FILE, 1024*1024*8L, (short)1, 0L);
     DFSTestUtil.createFile(fs, TEST_FILE, 1024*1024*8L, (short)1, 0L);
     FSDataInputStream stm = fs.open(TEST_FILE);
     FSDataInputStream stm = fs.open(TEST_FILE);
-    try {
-      stm.read();
-      assertXceiverCount(1);
-
-      // Poll for 0 running xceivers.  Allow up to 5 seconds for some slack.
-      long totalSleepTime = 0;
-      long sleepTime = WRITE_TIMEOUT + 100;
-      while (getXceiverCountWithoutServer() > 0 && totalSleepTime < 5000) {
-        Thread.sleep(sleepTime);
-        totalSleepTime += sleepTime;
-        sleepTime = 100;
+    stm.read();
+    assertXceiverCount(1);
+
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      public Boolean get() {
+        // DN should time out in sendChunks, and this should force
+        // the xceiver to exit.
+        return getXceiverCountWithoutServer() == 0;
       }
       }
+    }, 500, 50000);
 
 
-      // DN should time out in sendChunks, and this should force
-      // the xceiver to exit.
-      assertXceiverCount(0);
-    } finally {
-      IOUtils.closeStream(stm);
-    }
+    IOUtils.closeStream(stm);
   }
   }
   
   
   @Test(timeout=30000)
   @Test(timeout=30000)
   public void testManyClosedSocketsInCache() throws Exception {
   public void testManyClosedSocketsInCache() throws Exception {
     // Make a small file
     // Make a small file
+    DistributedFileSystem fs = cluster.getFileSystem();
     DFSTestUtil.createFile(fs, TEST_FILE, 1L, (short)1, 0L);
     DFSTestUtil.createFile(fs, TEST_FILE, 1L, (short)1, 0L);
 
 
     // Insert a bunch of dead sockets in the cache, by opening
     // Insert a bunch of dead sockets in the cache, by opening

+ 54 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHttpPolicy.java

@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import static org.apache.hadoop.http.HttpConfig.Policy.HTTP_AND_HTTPS;
+import static org.apache.hadoop.http.HttpConfig.Policy.HTTP_ONLY;
+
+import org.apache.hadoop.HadoopIllegalArgumentException;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Assert;
+import org.junit.Test;
+
+public final class TestHttpPolicy {
+
+  @Test(expected = HadoopIllegalArgumentException.class)
+  public void testInvalidPolicyValue() {
+    Configuration conf = new Configuration();
+    conf.set(DFSConfigKeys.DFS_HTTP_POLICY_KEY, "invalid");
+    DFSUtil.getHttpPolicy(conf);
+  }
+
+  @Test
+  public void testDeprecatedConfiguration() {
+    Configuration conf = new Configuration(false);
+    Assert.assertSame(HTTP_ONLY, DFSUtil.getHttpPolicy(conf));
+
+    conf.setBoolean(DFSConfigKeys.DFS_HTTPS_ENABLE_KEY, true);
+    Assert.assertSame(HTTP_AND_HTTPS, DFSUtil.getHttpPolicy(conf));
+
+    conf = new Configuration(false);
+    conf.setBoolean(DFSConfigKeys.HADOOP_SSL_ENABLED_KEY, true);
+    Assert.assertSame(HTTP_AND_HTTPS, DFSUtil.getHttpPolicy(conf));
+
+    conf = new Configuration(false);
+    conf.set(DFSConfigKeys.DFS_HTTP_POLICY_KEY, HTTP_ONLY.name());
+    conf.setBoolean(DFSConfigKeys.DFS_HTTPS_ENABLE_KEY, true);
+    Assert.assertSame(HTTP_ONLY, DFSUtil.getHttpPolicy(conf));
+  }
+}

+ 104 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/client/TestShortCircuitSharedMemorySegment.java

@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.client;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.util.ArrayList;
+
+import org.apache.commons.lang.SystemUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.io.nativeio.NativeIO;
+import org.apache.hadoop.io.nativeio.SharedFileDescriptorFactory;
+import org.apache.hadoop.hdfs.client.ShortCircuitSharedMemorySegment.Slot;
+import org.junit.Assume;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.Assert;
+
+public class TestShortCircuitSharedMemorySegment {
+  public static final Log LOG =
+      LogFactory.getLog(TestShortCircuitSharedMemorySegment.class);
+  
+  private static final File TEST_BASE =
+      new File(System.getProperty("test.build.data", "/tmp"));
+
+  @Before
+  public void before() {
+    Assume.assumeTrue(NativeIO.isAvailable());
+    Assume.assumeTrue(SystemUtils.IS_OS_UNIX);
+  }
+
+  @Test(timeout=60000)
+  public void testStartupShutdown() throws Exception {
+    File path = new File(TEST_BASE, "testStartupShutdown");
+    path.mkdirs();
+    SharedFileDescriptorFactory factory =
+        new SharedFileDescriptorFactory("shm_", path.getAbsolutePath());
+    FileInputStream stream = factory.createDescriptor(4096);
+    ShortCircuitSharedMemorySegment shm = 
+        new ShortCircuitSharedMemorySegment(stream);
+    shm.close();
+    stream.close();
+    FileUtil.fullyDelete(path);
+  }
+
+  @Test(timeout=60000)
+  public void testAllocateSlots() throws Exception {
+    File path = new File(TEST_BASE, "testAllocateSlots");
+    path.mkdirs();
+    SharedFileDescriptorFactory factory =
+        new SharedFileDescriptorFactory("shm_", path.getAbsolutePath());
+    FileInputStream stream = factory.createDescriptor(4096);
+    ShortCircuitSharedMemorySegment shm = 
+        new ShortCircuitSharedMemorySegment(stream);
+    int numSlots = 0;
+    ArrayList<Slot> slots = new ArrayList<Slot>();
+    while (true) {
+      Slot slot = shm.allocateNextSlot();
+      if (slot == null) {
+        LOG.info("allocated " + numSlots + " slots before running out.");
+        break;
+      }
+      slots.add(slot);
+      numSlots++;
+    }
+    int slotIdx = 0;
+    for (Slot slot : slots) {
+      Assert.assertFalse(slot.addAnchor());
+      Assert.assertEquals(slotIdx++, slot.getIndex());
+    }
+    for (Slot slot : slots) {
+      slot.makeAnchorable();
+    }
+    for (Slot slot : slots) {
+      Assert.assertTrue(slot.addAnchor());
+    }
+    for (Slot slot : slots) {
+      slot.removeAnchor();
+    }
+    shm.close();
+    for (Slot slot : slots) {
+      slot.close();
+    }
+    stream.close();
+    FileUtil.fullyDelete(path);
+  }
+}

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java

@@ -65,7 +65,7 @@ public class TestBalancerWithNodeGroup {
 
 
   ClientProtocol client;
   ClientProtocol client;
 
 
-  static final long TIMEOUT = 20000L; //msec
+  static final long TIMEOUT = 40000L; //msec
   static final double CAPACITY_ALLOWED_VARIANCE = 0.005;  // 0.5%
   static final double CAPACITY_ALLOWED_VARIANCE = 0.005;  // 0.5%
   static final double BALANCE_ALLOWED_VARIANCE = 0.11;    // 10%+delta
   static final double BALANCE_ALLOWED_VARIANCE = 0.11;    // 10%+delta
   static final int DEFAULT_BLOCK_SIZE = 10;
   static final int DEFAULT_BLOCK_SIZE = 10;

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java

@@ -124,6 +124,8 @@ public class TestReplicationPolicyWithNodeGroup {
     CONF.set(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY, 
     CONF.set(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY, 
         NetworkTopologyWithNodeGroup.class.getName());
         NetworkTopologyWithNodeGroup.class.getName());
     
     
+    CONF.setBoolean(DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_WRITE_KEY, true);
+    
     File baseDir = PathUtils.getTestDir(TestReplicationPolicyWithNodeGroup.class);
     File baseDir = PathUtils.getTestDir(TestReplicationPolicyWithNodeGroup.class);
     
     
     CONF.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
     CONF.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,

+ 71 - 165
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockReport.java → hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/BlockReportTestBase.java

@@ -52,7 +52,6 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
-import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
 import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
 import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
@@ -69,20 +68,16 @@ import org.mockito.Mockito;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.invocation.InvocationOnMock;
 
 
 /**
 /**
- * This test simulates a variety of situations when blocks are being
- * intentionally corrupted, unexpectedly modified, and so on before a block
- * report is happening.
+ * This is the base class for simulating a variety of situations
+ * when blocks are being intentionally corrupted, unexpectedly modified,
+ * and so on before a block report is happening.
  *
  *
- * For each test case it runs two variations:
- *  #1 - For a given DN, the first variation sends block reports for all
- *       storages in a single call to the NN.
- *  #2 - For a given DN, the second variation sends block reports for each
- *       storage in a separate call.
- *
- * The behavior should be the same in either variation.
+ * By overriding {@link #sendBlockReports}, derived classes can test
+ * different variations of how block reports are split across storages
+ * and messages.
  */
  */
-public class TestBlockReport {
-  public static final Log LOG = LogFactory.getLog(TestBlockReport.class);
+public abstract class BlockReportTestBase {
+  public static final Log LOG = LogFactory.getLog(BlockReportTestBase.class);
 
 
   private static short REPL_FACTOR = 1;
   private static short REPL_FACTOR = 1;
   private static final int RAND_LIMIT = 2000;
   private static final int RAND_LIMIT = 2000;
@@ -91,12 +86,11 @@ public class TestBlockReport {
   private static final int DN_N0 = 0;
   private static final int DN_N0 = 0;
   private static final int FILE_START = 0;
   private static final int FILE_START = 0;
 
 
-  static final int BLOCK_SIZE = 1024;
-  static final int NUM_BLOCKS = 10;
-  static final int FILE_SIZE = NUM_BLOCKS * BLOCK_SIZE + 1;
-  static String bpid;
+  private static final int BLOCK_SIZE = 1024;
+  private static final int NUM_BLOCKS = 10;
+  private static final int FILE_SIZE = NUM_BLOCKS * BLOCK_SIZE + 1;
 
 
-  private MiniDFSCluster cluster;
+  protected MiniDFSCluster cluster;
   private DistributedFileSystem fs;
   private DistributedFileSystem fs;
 
 
   private static Random rand = new Random(RAND_LIMIT);
   private static Random rand = new Random(RAND_LIMIT);
@@ -112,8 +106,7 @@ public class TestBlockReport {
   public void startUpCluster() throws IOException {
   public void startUpCluster() throws IOException {
     REPL_FACTOR = 1; //Reset if case a test has modified the value
     REPL_FACTOR = 1; //Reset if case a test has modified the value
     cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPL_FACTOR).build();
     cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPL_FACTOR).build();
-    fs = (DistributedFileSystem) cluster.getFileSystem();
-    bpid = cluster.getNamesystem().getBlockPoolId();
+    fs = cluster.getFileSystem();
   }
   }
 
 
   @After
   @After
@@ -123,6 +116,15 @@ public class TestBlockReport {
     cluster.shutdown();
     cluster.shutdown();
   }
   }
 
 
+  protected static void resetConfiguration() {
+    conf = new Configuration();
+    int customPerChecksumSize = 512;
+    int customBlockSize = customPerChecksumSize * 3;
+    conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
+    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);
+    conf.setLong(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, DN_RESCAN_INTERVAL);
+  }
+
   // Generate a block report, optionally corrupting the generation
   // Generate a block report, optionally corrupting the generation
   // stamp and/or length of one block.
   // stamp and/or length of one block.
   private static StorageBlockReport[] getBlockReports(
   private static StorageBlockReport[] getBlockReports(
@@ -172,106 +174,11 @@ public class TestBlockReport {
    * @param dnR
    * @param dnR
    * @param poolId
    * @param poolId
    * @param reports
    * @param reports
-   * @param needtoSplit
    * @throws IOException
    * @throws IOException
    */
    */
-  private void sendBlockReports(DatanodeRegistration dnR, String poolId,
-      StorageBlockReport[] reports, boolean needtoSplit) throws IOException {
-    if (!needtoSplit) {
-      LOG.info("Sending combined block reports for " + dnR);
-      cluster.getNameNodeRpc().blockReport(dnR, poolId, reports);
-    } else {
-      for (StorageBlockReport report : reports) {
-        LOG.info("Sending block report for storage " + report.getStorage().getStorageID());
-        StorageBlockReport[] singletonReport = { report };
-        cluster.getNameNodeRpc().blockReport(dnR, poolId, singletonReport);
-      }
-    }
-  }
+  protected abstract void sendBlockReports(DatanodeRegistration dnR, String poolId,
+      StorageBlockReport[] reports) throws IOException;
 
 
-  /**
-   * Test variations blockReport_01 through blockReport_09 with combined
-   * and split block reports.
-   */
-  @Test
-  public void blockReportCombined_01() throws IOException {
-    blockReport_01(false);
-  }
-
-  @Test
-  public void blockReportSplit_01() throws IOException {
-    blockReport_01(true);
-  }
-
-  @Test
-  public void blockReportCombined_02() throws IOException {
-    blockReport_02(false);
-  }
-
-  @Test
-  public void blockReportSplit_02() throws IOException {
-    blockReport_02(true);
-  }
-
-  @Test
-  public void blockReportCombined_03() throws IOException {
-    blockReport_03(false);
-  }
-
-  @Test
-  public void blockReportSplit_03() throws IOException {
-    blockReport_03(true);
-  }
-
-  @Test
-  public void blockReportCombined_04() throws IOException {
-    blockReport_04(false);
-  }
-
-  @Test
-  public void blockReportSplit_04() throws IOException {
-    blockReport_04(true);
-  }
-
-  @Test
-  public void blockReportCombined_06() throws Exception {
-    blockReport_06(false);
-  }
-
-  @Test
-  public void blockReportSplit_06() throws Exception {
-    blockReport_06(true);
-  }
-
-  @Test
-  public void blockReportCombined_07() throws Exception {
-    blockReport_07(false);
-  }
-
-  @Test
-  public void blockReportSplit_07() throws Exception {
-    blockReport_07(true);
-  }
-
-  @Test
-  public void blockReportCombined_08() throws Exception {
-    blockReport_08(false);
-  }
-
-  @Test
-  public void blockReportSplit_08() throws Exception {
-    blockReport_08(true);
-  }
-
-  @Test
-  public void blockReportCombined_09() throws Exception {
-    blockReport_09(false);
-  }
-
-  @Test
-  public void blockReportSplit_09() throws Exception {
-    blockReport_09(true);
-  }
   /**
   /**
    * Test write a file, verifies and closes it. Then the length of the blocks
    * Test write a file, verifies and closes it. Then the length of the blocks
    * are messed up and BlockReport is forced.
    * are messed up and BlockReport is forced.
@@ -279,7 +186,8 @@ public class TestBlockReport {
    *
    *
    * @throws java.io.IOException on an error
    * @throws java.io.IOException on an error
    */
    */
-  private void blockReport_01(boolean splitBlockReports) throws IOException {
+  @Test(timeout=300000)
+  public void blockReport_01() throws IOException {
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
 
 
@@ -312,7 +220,7 @@ public class TestBlockReport {
     String poolId = cluster.getNamesystem().getBlockPoolId();
     String poolId = cluster.getNamesystem().getBlockPoolId();
     DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
     DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
     StorageBlockReport[] reports = getBlockReports(dn, poolId, false, false);
     StorageBlockReport[] reports = getBlockReports(dn, poolId, false, false);
-    sendBlockReports(dnR, poolId, reports, splitBlockReports);
+    sendBlockReports(dnR, poolId, reports);
 
 
     List<LocatedBlock> blocksAfterReport =
     List<LocatedBlock> blocksAfterReport =
       DFSTestUtil.getAllBlocks(fs.open(filePath));
       DFSTestUtil.getAllBlocks(fs.open(filePath));
@@ -338,7 +246,8 @@ public class TestBlockReport {
    *
    *
    * @throws IOException in case of errors
    * @throws IOException in case of errors
    */
    */
-  private void blockReport_02(boolean splitBlockReports) throws IOException {
+  @Test(timeout=300000)
+  public void blockReport_02() throws IOException {
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     LOG.info("Running test " + METHOD_NAME);
     LOG.info("Running test " + METHOD_NAME);
 
 
@@ -393,7 +302,7 @@ public class TestBlockReport {
     String poolId = cluster.getNamesystem().getBlockPoolId();
     String poolId = cluster.getNamesystem().getBlockPoolId();
     DatanodeRegistration dnR = dn0.getDNRegistrationForBP(poolId);
     DatanodeRegistration dnR = dn0.getDNRegistrationForBP(poolId);
     StorageBlockReport[] reports = getBlockReports(dn0, poolId, false, false);
     StorageBlockReport[] reports = getBlockReports(dn0, poolId, false, false);
-    sendBlockReports(dnR, poolId, reports, splitBlockReports);
+    sendBlockReports(dnR, poolId, reports);
 
 
     BlockManagerTestUtil.getComputedDatanodeWork(cluster.getNamesystem()
     BlockManagerTestUtil.getComputedDatanodeWork(cluster.getNamesystem()
         .getBlockManager());
         .getBlockManager());
@@ -414,17 +323,18 @@ public class TestBlockReport {
    *
    *
    * @throws IOException in case of an error
    * @throws IOException in case of an error
    */
    */
-  private void blockReport_03(boolean splitBlockReports) throws IOException {
+  @Test(timeout=300000)
+  public void blockReport_03() throws IOException {
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     ArrayList<Block> blocks = writeFile(METHOD_NAME, FILE_SIZE, filePath);
     ArrayList<Block> blocks = writeFile(METHOD_NAME, FILE_SIZE, filePath);
-    
+
     // all blocks belong to the same file, hence same BP
     // all blocks belong to the same file, hence same BP
     DataNode dn = cluster.getDataNodes().get(DN_N0);
     DataNode dn = cluster.getDataNodes().get(DN_N0);
     String poolId = cluster.getNamesystem().getBlockPoolId();
     String poolId = cluster.getNamesystem().getBlockPoolId();
     DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
     DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
     StorageBlockReport[] reports = getBlockReports(dn, poolId, true, false);
     StorageBlockReport[] reports = getBlockReports(dn, poolId, true, false);
-    sendBlockReports(dnR, poolId, reports, splitBlockReports);
+    sendBlockReports(dnR, poolId, reports);
     printStats();
     printStats();
 
 
     assertThat("Wrong number of corrupt blocks",
     assertThat("Wrong number of corrupt blocks",
@@ -441,7 +351,8 @@ public class TestBlockReport {
    *
    *
    * @throws IOException in case of an error
    * @throws IOException in case of an error
    */
    */
-  private void blockReport_04(boolean splitBlockReports) throws IOException {
+  @Test(timeout=300000)
+  public void blockReport_04() throws IOException {
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     DFSTestUtil.createFile(fs, filePath,
     DFSTestUtil.createFile(fs, filePath,
@@ -459,7 +370,7 @@ public class TestBlockReport {
 
 
     DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
     DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
     StorageBlockReport[] reports = getBlockReports(dn, poolId, false, false);
     StorageBlockReport[] reports = getBlockReports(dn, poolId, false, false);
-    sendBlockReports(dnR, poolId, reports, splitBlockReports);
+    sendBlockReports(dnR, poolId, reports);
     printStats();
     printStats();
 
 
     assertThat("Wrong number of corrupt blocks",
     assertThat("Wrong number of corrupt blocks",
@@ -476,7 +387,8 @@ public class TestBlockReport {
    *
    *
    * @throws IOException in case of an error
    * @throws IOException in case of an error
    */
    */
-  private void blockReport_06(boolean splitBlockReports) throws Exception {
+  @Test(timeout=300000)
+  public void blockReport_06() throws Exception {
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     final int DN_N1 = DN_N0 + 1;
     final int DN_N1 = DN_N0 + 1;
@@ -489,7 +401,7 @@ public class TestBlockReport {
     String poolId = cluster.getNamesystem().getBlockPoolId();
     String poolId = cluster.getNamesystem().getBlockPoolId();
     DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
     DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
     StorageBlockReport[] reports = getBlockReports(dn, poolId, false, false);
     StorageBlockReport[] reports = getBlockReports(dn, poolId, false, false);
-    sendBlockReports(dnR, poolId, reports, splitBlockReports);
+    sendBlockReports(dnR, poolId, reports);
     printStats();
     printStats();
     assertEquals("Wrong number of PendingReplication Blocks",
     assertEquals("Wrong number of PendingReplication Blocks",
       0, cluster.getNamesystem().getUnderReplicatedBlocks());
       0, cluster.getNamesystem().getUnderReplicatedBlocks());
@@ -508,7 +420,8 @@ public class TestBlockReport {
    *
    *
    * @throws IOException in case of an error
    * @throws IOException in case of an error
    */
    */
-  private void blockReport_07(boolean splitBlockReports) throws Exception {
+  @Test(timeout=300000)
+  public void blockReport_07() throws Exception {
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     final int DN_N1 = DN_N0 + 1;
     final int DN_N1 = DN_N0 + 1;
@@ -522,7 +435,7 @@ public class TestBlockReport {
     String poolId = cluster.getNamesystem().getBlockPoolId();
     String poolId = cluster.getNamesystem().getBlockPoolId();
     DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
     DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
     StorageBlockReport[] reports = getBlockReports(dn, poolId, true, false);
     StorageBlockReport[] reports = getBlockReports(dn, poolId, true, false);
-    sendBlockReports(dnR, poolId, reports, splitBlockReports);
+    sendBlockReports(dnR, poolId, reports);
     printStats();
     printStats();
 
 
     assertThat("Wrong number of corrupt blocks",
     assertThat("Wrong number of corrupt blocks",
@@ -533,7 +446,7 @@ public class TestBlockReport {
                cluster.getNamesystem().getPendingReplicationBlocks(), is(0L));
                cluster.getNamesystem().getPendingReplicationBlocks(), is(0L));
 
 
     reports = getBlockReports(dn, poolId, true, true);
     reports = getBlockReports(dn, poolId, true, true);
-    sendBlockReports(dnR, poolId, reports, splitBlockReports);
+    sendBlockReports(dnR, poolId, reports);
     printStats();
     printStats();
 
 
     assertThat("Wrong number of corrupt blocks",
     assertThat("Wrong number of corrupt blocks",
@@ -559,7 +472,8 @@ public class TestBlockReport {
    *
    *
    * @throws IOException in case of an error
    * @throws IOException in case of an error
    */
    */
-  private void blockReport_08(boolean splitBlockReports) throws IOException {
+  @Test(timeout=300000)
+  public void blockReport_08() throws IOException {
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     final int DN_N1 = DN_N0 + 1;
     final int DN_N1 = DN_N0 + 1;
@@ -578,13 +492,13 @@ public class TestBlockReport {
       bc.start();
       bc.start();
 
 
       waitForTempReplica(bl, DN_N1);
       waitForTempReplica(bl, DN_N1);
-      
+
       // all blocks belong to the same file, hence same BP
       // all blocks belong to the same file, hence same BP
       DataNode dn = cluster.getDataNodes().get(DN_N1);
       DataNode dn = cluster.getDataNodes().get(DN_N1);
       String poolId = cluster.getNamesystem().getBlockPoolId();
       String poolId = cluster.getNamesystem().getBlockPoolId();
       DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
       DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
       StorageBlockReport[] reports = getBlockReports(dn, poolId, false, false);
       StorageBlockReport[] reports = getBlockReports(dn, poolId, false, false);
-      sendBlockReports(dnR, poolId, reports, splitBlockReports);
+      sendBlockReports(dnR, poolId, reports);
       printStats();
       printStats();
       assertEquals("Wrong number of PendingReplication blocks",
       assertEquals("Wrong number of PendingReplication blocks",
         blocks.size(), cluster.getNamesystem().getPendingReplicationBlocks());
         blocks.size(), cluster.getNamesystem().getPendingReplicationBlocks());
@@ -600,7 +514,8 @@ public class TestBlockReport {
   // Similar to BlockReport_08 but corrupts GS and len of the TEMPORARY's
   // Similar to BlockReport_08 but corrupts GS and len of the TEMPORARY's
   // replica block. Expect the same behaviour: NN should simply ignore this
   // replica block. Expect the same behaviour: NN should simply ignore this
   // block
   // block
-  private void blockReport_09(boolean splitBlockReports) throws IOException {
+  @Test(timeout=300000)
+  public void blockReport_09() throws IOException {
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     final int DN_N1 = DN_N0 + 1;
     final int DN_N1 = DN_N0 + 1;
@@ -620,17 +535,17 @@ public class TestBlockReport {
       bc.start();
       bc.start();
 
 
       waitForTempReplica(bl, DN_N1);
       waitForTempReplica(bl, DN_N1);
-                                                
+
       // all blocks belong to the same file, hence same BP
       // all blocks belong to the same file, hence same BP
       DataNode dn = cluster.getDataNodes().get(DN_N1);
       DataNode dn = cluster.getDataNodes().get(DN_N1);
       String poolId = cluster.getNamesystem().getBlockPoolId();
       String poolId = cluster.getNamesystem().getBlockPoolId();
       DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
       DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
       StorageBlockReport[] reports = getBlockReports(dn, poolId, true, true);
       StorageBlockReport[] reports = getBlockReports(dn, poolId, true, true);
-      sendBlockReports(dnR, poolId, reports, splitBlockReports);
+      sendBlockReports(dnR, poolId, reports);
       printStats();
       printStats();
       assertEquals("Wrong number of PendingReplication blocks",
       assertEquals("Wrong number of PendingReplication blocks",
         2, cluster.getNamesystem().getPendingReplicationBlocks());
         2, cluster.getNamesystem().getPendingReplicationBlocks());
-      
+
       try {
       try {
         bc.join();
         bc.join();
       } catch (InterruptedException e) {}
       } catch (InterruptedException e) {}
@@ -638,7 +553,7 @@ public class TestBlockReport {
       resetConfiguration(); // return the initial state of the configuration
       resetConfiguration(); // return the initial state of the configuration
     }
     }
   }
   }
-  
+
   /**
   /**
    * Test for the case where one of the DNs in the pipeline is in the
    * Test for the case where one of the DNs in the pipeline is in the
    * process of doing a block report exactly when the block is closed.
    * process of doing a block report exactly when the block is closed.
@@ -648,7 +563,7 @@ public class TestBlockReport {
    * corrupt.
    * corrupt.
    * This is a regression test for HDFS-2791.
    * This is a regression test for HDFS-2791.
    */
    */
-  @Test
+  @Test(timeout=300000)
   public void testOneReplicaRbwReportArrivesAfterBlockCompleted() throws Exception {
   public void testOneReplicaRbwReportArrivesAfterBlockCompleted() throws Exception {
     final CountDownLatch brFinished = new CountDownLatch(1);
     final CountDownLatch brFinished = new CountDownLatch(1);
     DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG) {
     DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG) {
@@ -663,7 +578,7 @@ public class TestBlockReport {
         }
         }
       }
       }
     };
     };
-    
+
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
 
 
@@ -671,9 +586,9 @@ public class TestBlockReport {
     // what happens when one of the DNs is slowed for some reason.
     // what happens when one of the DNs is slowed for some reason.
     REPL_FACTOR = 2;
     REPL_FACTOR = 2;
     startDNandWait(null, false);
     startDNandWait(null, false);
-    
+
     NameNode nn = cluster.getNameNode();
     NameNode nn = cluster.getNameNode();
-    
+
     FSDataOutputStream out = fs.create(filePath, REPL_FACTOR);
     FSDataOutputStream out = fs.create(filePath, REPL_FACTOR);
     try {
     try {
       AppendTestUtil.write(out, 0, 10);
       AppendTestUtil.write(out, 0, 10);
@@ -684,19 +599,19 @@ public class TestBlockReport {
       DataNode dn = cluster.getDataNodes().get(0);
       DataNode dn = cluster.getDataNodes().get(0);
       DatanodeProtocolClientSideTranslatorPB spy =
       DatanodeProtocolClientSideTranslatorPB spy =
         DataNodeTestUtils.spyOnBposToNN(dn, nn);
         DataNodeTestUtils.spyOnBposToNN(dn, nn);
-      
+
       Mockito.doAnswer(delayer)
       Mockito.doAnswer(delayer)
         .when(spy).blockReport(
         .when(spy).blockReport(
           Mockito.<DatanodeRegistration>anyObject(),
           Mockito.<DatanodeRegistration>anyObject(),
           Mockito.anyString(),
           Mockito.anyString(),
           Mockito.<StorageBlockReport[]>anyObject());
           Mockito.<StorageBlockReport[]>anyObject());
-      
+
       // Force a block report to be generated. The block report will have
       // Force a block report to be generated. The block report will have
       // an RBW replica in it. Wait for the RPC to be sent, but block
       // an RBW replica in it. Wait for the RPC to be sent, but block
       // it before it gets to the NN.
       // it before it gets to the NN.
       dn.scheduleAllBlockReport(0);
       dn.scheduleAllBlockReport(0);
       delayer.waitForCall();
       delayer.waitForCall();
-      
+
     } finally {
     } finally {
       IOUtils.closeStream(out);
       IOUtils.closeStream(out);
     }
     }
@@ -705,22 +620,22 @@ public class TestBlockReport {
     // state.
     // state.
     delayer.proceed();
     delayer.proceed();
     brFinished.await();
     brFinished.await();
-    
+
     // Verify that no replicas are marked corrupt, and that the
     // Verify that no replicas are marked corrupt, and that the
     // file is still readable.
     // file is still readable.
     BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager());
     BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager());
     assertEquals(0, nn.getNamesystem().getCorruptReplicaBlocks());
     assertEquals(0, nn.getNamesystem().getCorruptReplicaBlocks());
     DFSTestUtil.readFile(fs, filePath);
     DFSTestUtil.readFile(fs, filePath);
-    
+
     // Ensure that the file is readable even from the DN that we futzed with.
     // Ensure that the file is readable even from the DN that we futzed with.
     cluster.stopDataNode(1);
     cluster.stopDataNode(1);
-    DFSTestUtil.readFile(fs, filePath);    
+    DFSTestUtil.readFile(fs, filePath);
   }
   }
 
 
   private void waitForTempReplica(Block bl, int DN_N1) throws IOException {
   private void waitForTempReplica(Block bl, int DN_N1) throws IOException {
     final boolean tooLongWait = false;
     final boolean tooLongWait = false;
     final int TIMEOUT = 40000;
     final int TIMEOUT = 40000;
-    
+
     if(LOG.isDebugEnabled()) {
     if(LOG.isDebugEnabled()) {
       LOG.debug("Wait for datanode " + DN_N1 + " to appear");
       LOG.debug("Wait for datanode " + DN_N1 + " to appear");
     }
     }
@@ -731,7 +646,7 @@ public class TestBlockReport {
       LOG.debug("Total number of DNs " + cluster.getDataNodes().size());
       LOG.debug("Total number of DNs " + cluster.getDataNodes().size());
     }
     }
     cluster.waitActive();
     cluster.waitActive();
-    
+
     // Look about specified DN for the replica of the block from 1st DN
     // Look about specified DN for the replica of the block from 1st DN
     final DataNode dn1 = cluster.getDataNodes().get(DN_N1);
     final DataNode dn1 = cluster.getDataNodes().get(DN_N1);
     String bpid = cluster.getNamesystem().getBlockPoolId();
     String bpid = cluster.getNamesystem().getBlockPoolId();
@@ -789,7 +704,7 @@ public class TestBlockReport {
     return blocks;
     return blocks;
   }
   }
 
 
-  private void startDNandWait(Path filePath, boolean waitReplicas) 
+  private void startDNandWait(Path filePath, boolean waitReplicas)
       throws IOException, InterruptedException, TimeoutException {
       throws IOException, InterruptedException, TimeoutException {
     if (LOG.isDebugEnabled()) {
     if (LOG.isDebugEnabled()) {
       LOG.debug("Before next DN start: " + cluster.getDataNodes().size());
       LOG.debug("Before next DN start: " + cluster.getDataNodes().size());
@@ -802,7 +717,7 @@ public class TestBlockReport {
     if (LOG.isDebugEnabled()) {
     if (LOG.isDebugEnabled()) {
       int lastDn = datanodes.size() - 1;
       int lastDn = datanodes.size() - 1;
       LOG.debug("New datanode "
       LOG.debug("New datanode "
-          + cluster.getDataNodes().get(lastDn).getDisplayName() 
+          + cluster.getDataNodes().get(lastDn).getDisplayName()
           + " has been started");
           + " has been started");
     }
     }
     if (waitReplicas) {
     if (waitReplicas) {
@@ -898,7 +813,7 @@ public class TestBlockReport {
     ((Log4JLogger) NameNode.stateChangeLog).getLogger().setLevel(Level.ALL);
     ((Log4JLogger) NameNode.stateChangeLog).getLogger().setLevel(Level.ALL);
     ((Log4JLogger) LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.ALL);
     ((Log4JLogger) LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.ALL);
     ((Log4JLogger) DataNode.LOG).getLogger().setLevel(Level.ALL);
     ((Log4JLogger) DataNode.LOG).getLogger().setLevel(Level.ALL);
-    ((Log4JLogger) TestBlockReport.LOG).getLogger().setLevel(Level.ALL);
+    ((Log4JLogger) BlockReportTestBase.LOG).getLogger().setLevel(Level.ALL);
   }
   }
 
 
   private Block findBlock(Path path, long size) throws IOException {
   private Block findBlock(Path path, long size) throws IOException {
@@ -918,11 +833,11 @@ public class TestBlockReport {
 
 
   private class BlockChecker extends Thread {
   private class BlockChecker extends Thread {
     Path filePath;
     Path filePath;
-    
+
     public BlockChecker(final Path filePath) {
     public BlockChecker(final Path filePath) {
       this.filePath = filePath;
       this.filePath = filePath;
     }
     }
-    
+
     @Override
     @Override
     public void run() {
     public void run() {
       try {
       try {
@@ -933,13 +848,4 @@ public class TestBlockReport {
       }
       }
     }
     }
   }
   }
-
-  private static void resetConfiguration() {
-    conf = new Configuration();
-    int customPerChecksumSize = 512;
-    int customBlockSize = customPerChecksumSize * 3;
-    conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
-    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);
-    conf.setLong(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, DN_RESCAN_INTERVAL);
-  }
 }
 }

+ 16 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java

@@ -18,12 +18,16 @@
 package org.apache.hadoop.hdfs.server.datanode;
 package org.apache.hadoop.hdfs.server.datanode;
 
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertTrue;
 
 
 import java.io.DataOutputStream;
 import java.io.DataOutputStream;
 import java.io.File;
 import java.io.File;
 import java.net.InetSocketAddress;
 import java.net.InetSocketAddress;
 import java.net.Socket;
 import java.net.Socket;
+import java.net.SocketException;
+import java.net.SocketTimeoutException;
+import java.nio.channels.ClosedChannelException;
 
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystem;
@@ -196,4 +200,16 @@ public class TestDiskError {
       }
       }
     }
     }
   }
   }
+  
+  @Test
+  public void testNetworkErrorsIgnored() {
+    DataNode dn = cluster.getDataNodes().iterator().next();
+    
+    assertTrue(dn.isNetworkRelatedException(new SocketException()));
+    assertTrue(dn.isNetworkRelatedException(new SocketTimeoutException()));
+    assertTrue(dn.isNetworkRelatedException(new ClosedChannelException()));
+    assertTrue(dn.isNetworkRelatedException(new Exception("Broken pipe foo bar")));
+    assertFalse(dn.isNetworkRelatedException(new Exception()));
+    assertFalse(dn.isNetworkRelatedException(new Exception("random problem")));
+  }
 }
 }

+ 205 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDnRespectsBlockReportSplitThreshold.java

@@ -0,0 +1,205 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.datanode;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.*;
+import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
+import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY;
+import org.apache.hadoop.test.GenericTestUtils;
+
+import org.junit.After;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mockito;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Matchers.*;
+import static org.mockito.Mockito.times;
+
+/**
+ * Tests that the DataNode respects
+ * {@link DFSConfigKeys#DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY}
+ */
+public class TestDnRespectsBlockReportSplitThreshold {
+  public static final Log LOG = LogFactory.getLog(TestStorageReport.class);
+
+  private static final int BLOCK_SIZE = 1024;
+  private static final short REPL_FACTOR = 1;
+  private static final long seed = 0xFEEDFACE;
+  private static final int BLOCKS_IN_FILE = 5;
+
+  private static Configuration conf;
+  private MiniDFSCluster cluster;
+  private DistributedFileSystem fs;
+  static String bpid;
+
+  public void startUpCluster(long splitThreshold) throws IOException {
+    conf = new HdfsConfiguration();
+    conf.setLong(DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY, splitThreshold);
+    cluster = new MiniDFSCluster.Builder(conf)
+        .numDataNodes(REPL_FACTOR)
+        .build();
+    fs = cluster.getFileSystem();
+    bpid = cluster.getNamesystem().getBlockPoolId();
+  }
+
+  @After
+  public void shutDownCluster() throws IOException {
+    if (cluster != null) {
+      fs.close();
+      cluster.shutdown();
+      cluster = null;
+    }
+  }
+
+  private void createFile(String filenamePrefix, int blockCount)
+      throws IOException {
+    Path path = new Path("/" + filenamePrefix + ".dat");
+    DFSTestUtil.createFile(fs, path, BLOCK_SIZE,
+        blockCount * BLOCK_SIZE, BLOCK_SIZE, REPL_FACTOR, seed);
+  }
+
+  private void verifyCapturedArguments(
+      ArgumentCaptor<StorageBlockReport[]> captor,
+      int expectedReportsPerCall,
+      int expectedTotalBlockCount) {
+
+    List<StorageBlockReport[]> listOfReports = captor.getAllValues();
+    int numBlocksReported = 0;
+    for (StorageBlockReport[] reports : listOfReports) {
+      assertThat(reports.length, is(expectedReportsPerCall));
+
+      for (StorageBlockReport report : reports) {
+        BlockListAsLongs blockList = new BlockListAsLongs(report.getBlocks());
+        numBlocksReported += blockList.getNumberOfBlocks();
+      }
+    }
+
+    assert(numBlocksReported >= expectedTotalBlockCount);
+  }
+
+  /**
+   * Test that if splitThreshold is zero, then we always get a separate
+   * call per storage.
+   */
+  @Test(timeout=300000)
+  public void testAlwaysSplit() throws IOException, InterruptedException {
+    startUpCluster(0);
+    NameNode nn = cluster.getNameNode();
+    DataNode dn = cluster.getDataNodes().get(0);
+
+    // Create a file with a few blocks.
+    createFile(GenericTestUtils.getMethodName(), BLOCKS_IN_FILE);
+
+    // Insert a spy object for the NN RPC.
+    DatanodeProtocolClientSideTranslatorPB nnSpy =
+        DataNodeTestUtils.spyOnBposToNN(dn, nn);
+
+    // Trigger a block report so there is an interaction with the spy
+    // object.
+    DataNodeTestUtils.triggerBlockReport(dn);
+
+    ArgumentCaptor<StorageBlockReport[]> captor =
+        ArgumentCaptor.forClass(StorageBlockReport[].class);
+
+    Mockito.verify(nnSpy, times(MiniDFSCluster.DIRS_PER_DATANODE)).blockReport(
+        any(DatanodeRegistration.class),
+        anyString(),
+        captor.capture());
+
+    verifyCapturedArguments(captor, 1, BLOCKS_IN_FILE);
+  }
+
+  /**
+   * Tests the behavior when the count of blocks is exactly one less than
+   * the threshold.
+   */
+  @Test(timeout=300000)
+  public void testCornerCaseUnderThreshold() throws IOException, InterruptedException {
+    startUpCluster(BLOCKS_IN_FILE + 1);
+    NameNode nn = cluster.getNameNode();
+    DataNode dn = cluster.getDataNodes().get(0);
+
+    // Create a file with a few blocks.
+    createFile(GenericTestUtils.getMethodName(), BLOCKS_IN_FILE);
+
+    // Insert a spy object for the NN RPC.
+    DatanodeProtocolClientSideTranslatorPB nnSpy =
+        DataNodeTestUtils.spyOnBposToNN(dn, nn);
+
+    // Trigger a block report so there is an interaction with the spy
+    // object.
+    DataNodeTestUtils.triggerBlockReport(dn);
+
+    ArgumentCaptor<StorageBlockReport[]> captor =
+        ArgumentCaptor.forClass(StorageBlockReport[].class);
+
+    Mockito.verify(nnSpy, times(1)).blockReport(
+        any(DatanodeRegistration.class),
+        anyString(),
+        captor.capture());
+
+    verifyCapturedArguments(captor, MiniDFSCluster.DIRS_PER_DATANODE, BLOCKS_IN_FILE);
+  }
+
+  /**
+   * Tests the behavior when the count of blocks is exactly equal to the
+   * threshold.
+   */
+  @Test(timeout=300000)
+  public void testCornerCaseAtThreshold() throws IOException, InterruptedException {
+    startUpCluster(BLOCKS_IN_FILE);
+    NameNode nn = cluster.getNameNode();
+    DataNode dn = cluster.getDataNodes().get(0);
+
+    // Create a file with a few blocks.
+    createFile(GenericTestUtils.getMethodName(), BLOCKS_IN_FILE);
+
+    // Insert a spy object for the NN RPC.
+    DatanodeProtocolClientSideTranslatorPB nnSpy =
+        DataNodeTestUtils.spyOnBposToNN(dn, nn);
+
+    // Trigger a block report so there is an interaction with the spy
+    // object.
+    DataNodeTestUtils.triggerBlockReport(dn);
+
+    ArgumentCaptor<StorageBlockReport[]> captor =
+        ArgumentCaptor.forClass(StorageBlockReport[].class);
+
+    Mockito.verify(nnSpy, times(MiniDFSCluster.DIRS_PER_DATANODE)).blockReport(
+        any(DatanodeRegistration.class),
+        anyString(),
+        captor.capture());
+
+    verifyCapturedArguments(captor, 1, BLOCKS_IN_FILE);
+  }
+
+}

+ 42 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestNNHandlesBlockReportPerStorage.java

@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.datanode;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
+
+
+/**
+ * Runs all tests in BlockReportTestBase, sending one block per storage.
+ * This is the default DataNode behavior post HDFS-2832.
+ */
+public class TestNNHandlesBlockReportPerStorage extends BlockReportTestBase {
+
+  @Override
+  protected void sendBlockReports(DatanodeRegistration dnR, String poolId,
+      StorageBlockReport[] reports) throws IOException {
+    for (StorageBlockReport report : reports) {
+      LOG.info("Sending block report for storage " + report.getStorage().getStorageID());
+      StorageBlockReport[] singletonReport = { report };
+      cluster.getNameNodeRpc().blockReport(dnR, poolId, singletonReport);
+    }
+  }
+}

+ 39 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestNNHandlesCombinedBlockReport.java

@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.datanode;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
+
+/**
+ * Runs all tests in BlockReportTestBase, sending one block report
+ * per DataNode. This tests that the NN can handle the legacy DN
+ * behavior where it presents itself as a single logical storage.
+ */
+public class TestNNHandlesCombinedBlockReport extends BlockReportTestBase {
+
+  @Override
+  protected void sendBlockReports(DatanodeRegistration dnR, String poolId,
+                                  StorageBlockReport[] reports) throws IOException {
+    LOG.info("Sending combined block reports for " + dnR);
+    cluster.getNameNodeRpc().blockReport(dnR, poolId, reports);
+  }
+}

+ 4 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeHttpServer.java

@@ -85,6 +85,7 @@ public class TestNameNodeHttpServer {
   @Test
   @Test
   public void testHttpPolicy() throws Exception {
   public void testHttpPolicy() throws Exception {
     conf.set(DFSConfigKeys.DFS_HTTP_POLICY_KEY, policy.name());
     conf.set(DFSConfigKeys.DFS_HTTP_POLICY_KEY, policy.name());
+    conf.set(DFSConfigKeys.DFS_NAMENODE_HTTPS_ADDRESS_KEY, "localhost:0");
 
 
     InetSocketAddress addr = InetSocketAddress.createUnresolved("localhost", 0);
     InetSocketAddress addr = InetSocketAddress.createUnresolved("localhost", 0);
     NameNodeHttpServer server = null;
     NameNodeHttpServer server = null;
@@ -103,7 +104,9 @@ public class TestNameNodeHttpServer {
           server.getHttpsAddress() == null));
           server.getHttpsAddress() == null));
 
 
     } finally {
     } finally {
-      server.stop();
+      if (server != null) {
+        server.stop();
+      }
     }
     }
   }
   }
 
 

+ 105 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeOptionParsing.java

@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hadoop.hdfs.protocol.LayoutVersion;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
+import org.junit.Test;
+
+public class TestNameNodeOptionParsing {
+
+  @Test(timeout = 10000)
+  public void testUpgrade() {
+    StartupOption opt = null;
+    // UPGRADE is set, but nothing else
+    opt = NameNode.parseArguments(new String[] {"-upgrade"});
+    assertEquals(opt, StartupOption.UPGRADE);
+    assertNull(opt.getClusterId());
+    assertTrue(FSImageFormat.renameReservedMap.isEmpty());
+    // cluster ID is set
+    opt = NameNode.parseArguments(new String[] { "-upgrade", "-clusterid",
+        "mycid" });
+    assertEquals(StartupOption.UPGRADE, opt);
+    assertEquals("mycid", opt.getClusterId());
+    assertTrue(FSImageFormat.renameReservedMap.isEmpty());
+    // Everything is set
+    opt = NameNode.parseArguments(new String[] { "-upgrade", "-clusterid",
+        "mycid", "-renameReserved",
+        ".snapshot=.my-snapshot,.reserved=.my-reserved" });
+    assertEquals(StartupOption.UPGRADE, opt);
+    assertEquals("mycid", opt.getClusterId());
+    assertEquals(".my-snapshot",
+        FSImageFormat.renameReservedMap.get(".snapshot"));
+    assertEquals(".my-reserved",
+        FSImageFormat.renameReservedMap.get(".reserved"));
+    // Reset the map
+    FSImageFormat.renameReservedMap.clear();
+    // Everything is set, but in a different order
+    opt = NameNode.parseArguments(new String[] { "-upgrade", "-renameReserved",
+        ".reserved=.my-reserved,.snapshot=.my-snapshot", "-clusterid",
+        "mycid"});
+    assertEquals(StartupOption.UPGRADE, opt);
+    assertEquals("mycid", opt.getClusterId());
+    assertEquals(".my-snapshot",
+        FSImageFormat.renameReservedMap.get(".snapshot"));
+    assertEquals(".my-reserved",
+        FSImageFormat.renameReservedMap.get(".reserved"));
+    // Try the default renameReserved
+    opt = NameNode.parseArguments(new String[] { "-upgrade", "-renameReserved"});
+    assertEquals(StartupOption.UPGRADE, opt);
+    assertEquals(
+        ".snapshot." + LayoutVersion.getCurrentLayoutVersion()
+            + ".UPGRADE_RENAMED",
+        FSImageFormat.renameReservedMap.get(".snapshot"));
+    assertEquals(
+        ".reserved." + LayoutVersion.getCurrentLayoutVersion()
+            + ".UPGRADE_RENAMED",
+        FSImageFormat.renameReservedMap.get(".reserved"));
+
+    // Try some error conditions
+    try {
+      opt =
+          NameNode.parseArguments(new String[] { "-upgrade", "-renameReserved",
+              ".reserved=.my-reserved,.not-reserved=.my-not-reserved" });
+    } catch (IllegalArgumentException e) {
+      assertExceptionContains("Unknown reserved path", e);
+    }
+    try {
+      opt =
+          NameNode.parseArguments(new String[] { "-upgrade", "-renameReserved",
+              ".reserved=.my-reserved,.snapshot=.snapshot" });
+    } catch (IllegalArgumentException e) {
+      assertExceptionContains("Invalid rename path", e);
+    }
+    try {
+      opt =
+          NameNode.parseArguments(new String[] { "-upgrade", "-renameReserved",
+              ".snapshot=.reserved" });
+    } catch (IllegalArgumentException e) {
+      assertExceptionContains("Invalid rename path", e);
+    }
+    opt = NameNode.parseArguments(new String[] { "-upgrade", "-cid"});
+    assertNull(opt);
+  }
+
+}

+ 5 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java

@@ -55,6 +55,7 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
 import org.apache.hadoop.hdfs.server.namenode.FSImage;
 import org.apache.hadoop.hdfs.server.namenode.FSImage;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.SafeModeInfo;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.IOUtils;
@@ -65,6 +66,7 @@ import org.apache.log4j.Level;
 import org.junit.After;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.Test;
+import org.mockito.internal.util.reflection.Whitebox;
 
 
 import com.google.common.base.Supplier;
 import com.google.common.base.Supplier;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Lists;
@@ -124,6 +126,9 @@ public class TestHASafeMode {
     final Path test = new Path("/test");
     final Path test = new Path("/test");
     // let nn0 enter safemode
     // let nn0 enter safemode
     NameNodeAdapter.enterSafeMode(nn0, false);
     NameNodeAdapter.enterSafeMode(nn0, false);
+    SafeModeInfo safeMode = (SafeModeInfo) Whitebox.getInternalState(
+        nn0.getNamesystem(), "safeMode");
+    Whitebox.setInternalState(safeMode, "extension", Integer.valueOf(30000));
     LOG.info("enter safemode");
     LOG.info("enter safemode");
     new Thread() {
     new Thread() {
       @Override
       @Override

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestHttpsFileSystem.java

@@ -52,6 +52,7 @@ public class TestHttpsFileSystem {
     conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true);
     conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true);
     conf.set(DFSConfigKeys.DFS_HTTP_POLICY_KEY, HttpConfig.Policy.HTTPS_ONLY.name());
     conf.set(DFSConfigKeys.DFS_HTTP_POLICY_KEY, HttpConfig.Policy.HTTPS_ONLY.name());
     conf.set(DFSConfigKeys.DFS_NAMENODE_HTTPS_ADDRESS_KEY, "localhost:0");
     conf.set(DFSConfigKeys.DFS_NAMENODE_HTTPS_ADDRESS_KEY, "localhost:0");
+    conf.set(DFSConfigKeys.DFS_DATANODE_HTTPS_ADDRESS_KEY, "localhost:0");
 
 
     File base = new File(BASEDIR);
     File base = new File(BASEDIR);
     FileUtil.fullyDelete(base);
     FileUtil.fullyDelete(base);

二进制
hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-2-reserved.tgz


+ 9 - 0
hadoop-mapreduce-project/CHANGES.txt

@@ -153,6 +153,8 @@ Release 2.4.0 - UNRELEASED
     MAPREDUCE-5732. Report proper queue when job has been automatically placed
     MAPREDUCE-5732. Report proper queue when job has been automatically placed
     (Sandy Ryza)
     (Sandy Ryza)
 
 
+    MAPREDUCE-5699. Allow setting tags on MR jobs (kasha)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
   BUG FIXES
   BUG FIXES
@@ -228,6 +230,10 @@ Release 2.3.0 - UNRELEASED
     MAPREDUCE-5725. Make explicit that TestNetworkedJob relies on the Capacity
     MAPREDUCE-5725. Make explicit that TestNetworkedJob relies on the Capacity
     Scheduler (Sandy Ryza)
     Scheduler (Sandy Ryza)
 
 
+    MAPREDUCE-5744. Job hangs because 
+    RMContainerAllocator$AssignedRequests.preemptReduce() violates the 
+    comparator contract (Gera Shegalov via kasha)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
     MAPREDUCE-4680. Job history cleaner should only check timestamps of files in
     MAPREDUCE-4680. Job history cleaner should only check timestamps of files in
@@ -347,6 +353,9 @@ Release 2.3.0 - UNRELEASED
     MAPREDUCE-5723. MR AM container log can be truncated or empty.
     MAPREDUCE-5723. MR AM container log can be truncated or empty.
     (Mohammad Kamrul Islam via kasha)
     (Mohammad Kamrul Islam via kasha)
 
 
+    MAPREDUCE-5743. Fixed the test failure in TestRMContainerAllocator.
+    (Ted Yu and Vinod Kumar Vavilapalli via zjshen)
+
 Release 2.2.0 - 2013-10-13
 Release 2.2.0 - 2013-10-13
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 3 - 3
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java

@@ -1143,9 +1143,9 @@ public class RMContainerAllocator extends RMContainerRequestor
           new Comparator<TaskAttemptId>() {
           new Comparator<TaskAttemptId>() {
         @Override
         @Override
         public int compare(TaskAttemptId o1, TaskAttemptId o2) {
         public int compare(TaskAttemptId o1, TaskAttemptId o2) {
-          float p = getJob().getTask(o1.getTaskId()).getAttempt(o1).getProgress() -
-              getJob().getTask(o2.getTaskId()).getAttempt(o2).getProgress();
-          return p >= 0 ? 1 : -1;
+          return Float.compare(
+              getJob().getTask(o1.getTaskId()).getAttempt(o1).getProgress(),
+              getJob().getTask(o2.getTaskId()).getAttempt(o2).getProgress());
         }
         }
       });
       });
       
       

+ 8 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java

@@ -1652,8 +1652,16 @@ public class TestRMContainerAllocator {
     RMApp app = rm.submitApp(1024);
     RMApp app = rm.submitApp(1024);
     dispatcher.await();
     dispatcher.await();
 
 
+    // Make a node to register so as to launch the AM.
+    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
+    amNodeManager.nodeHeartbeat(true);
+    dispatcher.await();
+
     ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt()
     ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt()
         .getAppAttemptId();
         .getAppAttemptId();
+    rm.sendAMLaunched(appAttemptId);
+    dispatcher.await();
+
     JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
     JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
     Job job = mock(Job.class);
     Job job = mock(Job.class);
     when(job.getReport()).thenReturn(
     when(job.getReport()).thenReturn(

+ 2 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java

@@ -60,6 +60,8 @@ public interface MRJobConfig {
 
 
   public static final String QUEUE_NAME = "mapreduce.job.queuename";
   public static final String QUEUE_NAME = "mapreduce.job.queuename";
 
 
+  public static final String JOB_TAGS = "mapreduce.job.tags";
+
   public static final String JVM_NUMTASKS_TORUN = "mapreduce.job.jvm.numtasks";
   public static final String JVM_NUMTASKS_TORUN = "mapreduce.job.jvm.numtasks";
 
 
   public static final String SPLIT_FILE = "mapreduce.job.splitfile";
   public static final String SPLIT_FILE = "mapreduce.job.splitfile";

+ 9 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml

@@ -1,4 +1,5 @@
 <?xml version="1.0"?>
 <?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 <!--
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
    contributor license agreements.  See the NOTICE file distributed with
@@ -15,7 +16,6 @@
    See the License for the specific language governing permissions and
    See the License for the specific language governing permissions and
    limitations under the License.
    limitations under the License.
 -->
 -->
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 
 
 <!-- Do not modify this file directly.  Instead, copy entries that you -->
 <!-- Do not modify this file directly.  Instead, copy entries that you -->
 <!-- wish to modify from this file into mapred-site.xml and change them -->
 <!-- wish to modify from this file into mapred-site.xml and change them -->
@@ -727,6 +727,14 @@
   </description>
   </description>
 </property>
 </property>
 
 
+  <property>
+    <name>mapreduce.job.tags</name>
+    <value></value>
+    <description> Tags for the job that will be passed to YARN at submission 
+      time. Queries to YARN for applications can filter on these tags.
+    </description>
+  </property>
+
 <property>
 <property>
   <name>mapreduce.cluster.local.dir</name>
   <name>mapreduce.cluster.local.dir</name>
   <value>${hadoop.tmp.dir}/mapred/local</value>
   <value>${hadoop.tmp.dir}/mapred/local</value>

+ 7 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java

@@ -21,7 +21,9 @@ package org.apache.hadoop.mapred;
 import java.io.IOException;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.List;
 import java.util.Map;
 import java.util.Map;
 import java.util.Vector;
 import java.util.Vector;
@@ -467,6 +469,8 @@ public class YARNRunner implements ClientProtocol {
         ContainerLaunchContext.newInstance(localResources, environment,
         ContainerLaunchContext.newInstance(localResources, environment,
           vargsFinal, null, securityTokens, acls);
           vargsFinal, null, securityTokens, acls);
 
 
+    Collection<String> tagsFromConf =
+        jobConf.getTrimmedStringCollection(MRJobConfig.JOB_TAGS);
 
 
     // Set up the ApplicationSubmissionContext
     // Set up the ApplicationSubmissionContext
     ApplicationSubmissionContext appContext =
     ApplicationSubmissionContext appContext =
@@ -486,6 +490,9 @@ public class YARNRunner implements ClientProtocol {
             MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS));
             MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS));
     appContext.setResource(capability);
     appContext.setResource(capability);
     appContext.setApplicationType(MRJobConfig.MR_APPLICATION_TYPE);
     appContext.setApplicationType(MRJobConfig.MR_APPLICATION_TYPE);
+    if (tagsFromConf != null && !tagsFromConf.isEmpty()) {
+      appContext.setApplicationTags(new HashSet<String>(tagsFromConf));
+    }
     return appContext;
     return appContext;
   }
   }
 
 

+ 8 - 7
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java

@@ -37,15 +37,16 @@ public enum DistCpOptionSwitch {
   /**
   /**
    * Preserves status of file/path in the target.
    * Preserves status of file/path in the target.
    * Default behavior with -p, is to preserve replication,
    * Default behavior with -p, is to preserve replication,
-   * block size, user, group and permission on the target file
+   * block size, user, group, permission and checksum type on the target file.
+   * Note that when preserving checksum type, block size is also preserved.
    *
    *
-   * If any of the optional switches are present among rbugp, then
-   * only the corresponding file attribute is preserved
+   * If any of the optional switches are present among rbugpc, then
+   * only the corresponding file attribute is preserved.
    *
    *
    */
    */
   PRESERVE_STATUS(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
   PRESERVE_STATUS(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
-      new Option("p", true, "preserve status (rbugp)" +
-          "(replication, block-size, user, group, permission)")),
+      new Option("p", true, "preserve status (rbugpc)" +
+          "(replication, block-size, user, group, permission, checksum-type)")),
 
 
   /**
   /**
    * Update target location by copying only files that are missing
    * Update target location by copying only files that are missing
@@ -53,7 +54,7 @@ public enum DistCpOptionSwitch {
    * across source and target. Typically used with DELETE_MISSING
    * across source and target. Typically used with DELETE_MISSING
    * Incompatible with ATOMIC_COMMIT
    * Incompatible with ATOMIC_COMMIT
    */
    */
-  SYNC_FOLDERS(DistCpConstants.CONF_LABEL_SYNC_FOLDERS, 
+  SYNC_FOLDERS(DistCpConstants.CONF_LABEL_SYNC_FOLDERS,
       new Option("update", false, "Update target, copying only missing" +
       new Option("update", false, "Update target, copying only missing" +
           "files or directories")),
           "files or directories")),
 
 
@@ -80,7 +81,7 @@ public enum DistCpOptionSwitch {
    * Max number of maps to use during copy. DistCp will split work
    * Max number of maps to use during copy. DistCp will split work
    * as equally as possible among these maps
    * as equally as possible among these maps
    */
    */
-  MAX_MAPS(DistCpConstants.CONF_LABEL_MAX_MAPS, 
+  MAX_MAPS(DistCpConstants.CONF_LABEL_MAX_MAPS,
       new Option("m", true, "Max number of concurrent maps to use for copy")),
       new Option("m", true, "Max number of concurrent maps to use for copy")),
 
 
   /**
   /**

+ 1 - 1
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java

@@ -61,7 +61,7 @@ public class DistCpOptions {
   private Path targetPath;
   private Path targetPath;
 
 
   public static enum FileAttribute{
   public static enum FileAttribute{
-    REPLICATION, BLOCKSIZE, USER, GROUP, PERMISSION;
+    REPLICATION, BLOCKSIZE, USER, GROUP, PERMISSION, CHECKSUMTYPE;
 
 
     public static FileAttribute getAttribute(char symbol) {
     public static FileAttribute getAttribute(char symbol) {
       for (FileAttribute attribute : values()) {
       for (FileAttribute attribute : values()) {

+ 3 - 3
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java

@@ -34,7 +34,7 @@ public class OptionsParser {
 
 
   private static final Log LOG = LogFactory.getLog(OptionsParser.class);
   private static final Log LOG = LogFactory.getLog(OptionsParser.class);
 
 
-  private static final Options cliOptions = new Options();      
+  private static final Options cliOptions = new Options();
 
 
   static {
   static {
     for (DistCpOptionSwitch option : DistCpOptionSwitch.values()) {
     for (DistCpOptionSwitch option : DistCpOptionSwitch.values()) {
@@ -50,7 +50,7 @@ public class OptionsParser {
     protected String[] flatten(Options options, String[] arguments, boolean stopAtNonOption) {
     protected String[] flatten(Options options, String[] arguments, boolean stopAtNonOption) {
       for (int index = 0; index < arguments.length; index++) {
       for (int index = 0; index < arguments.length; index++) {
         if (arguments[index].equals("-" + DistCpOptionSwitch.PRESERVE_STATUS.getSwitch())) {
         if (arguments[index].equals("-" + DistCpOptionSwitch.PRESERVE_STATUS.getSwitch())) {
-          arguments[index] = "-prbugp";
+          arguments[index] = "-prbugpc";
         }
         }
       }
       }
       return super.flatten(options, arguments, stopAtNonOption);
       return super.flatten(options, arguments, stopAtNonOption);
@@ -125,7 +125,7 @@ public class OptionsParser {
         option.setAtomicWorkPath(new Path(workPath));
         option.setAtomicWorkPath(new Path(workPath));
       }
       }
     } else if (command.hasOption(DistCpOptionSwitch.WORK_PATH.getSwitch())) {
     } else if (command.hasOption(DistCpOptionSwitch.WORK_PATH.getSwitch())) {
-      throw new IllegalArgumentException("-tmp work-path can only be specified along with -atomic");      
+      throw new IllegalArgumentException("-tmp work-path can only be specified along with -atomic");
     }
     }
 
 
     if (command.hasOption(DistCpOptionSwitch.LOG_PATH.getSwitch())) {
     if (command.hasOption(DistCpOptionSwitch.LOG_PATH.getSwitch())) {

+ 3 - 3
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java

@@ -111,7 +111,7 @@ public class CopyMapper extends Mapper<Text, FileStatus, Text, Text> {
    */
    */
   private void initializeSSLConf(Context context) throws IOException {
   private void initializeSSLConf(Context context) throws IOException {
     LOG.info("Initializing SSL configuration");
     LOG.info("Initializing SSL configuration");
-    
+
     String workDir = conf.get(JobContext.JOB_LOCAL_DIR) + "/work";
     String workDir = conf.get(JobContext.JOB_LOCAL_DIR) + "/work";
     Path[] cacheFiles = context.getLocalCacheFiles();
     Path[] cacheFiles = context.getLocalCacheFiles();
 
 
@@ -294,7 +294,7 @@ public class CopyMapper extends Mapper<Text, FileStatus, Text, Text> {
             RetriableFileCopyCommand.CopyReadException) {
             RetriableFileCopyCommand.CopyReadException) {
       incrementCounter(context, Counter.FAIL, 1);
       incrementCounter(context, Counter.FAIL, 1);
       incrementCounter(context, Counter.BYTESFAILED, sourceFileStatus.getLen());
       incrementCounter(context, Counter.BYTESFAILED, sourceFileStatus.getLen());
-      context.write(null, new Text("FAIL: " + sourceFileStatus.getPath() + " - " + 
+      context.write(null, new Text("FAIL: " + sourceFileStatus.getPath() + " - " +
           StringUtils.stringifyException(exception)));
           StringUtils.stringifyException(exception)));
     }
     }
     else
     else
@@ -322,7 +322,7 @@ public class CopyMapper extends Mapper<Text, FileStatus, Text, Text> {
                    targetFileStatus.getLen() != source.getLen()
                    targetFileStatus.getLen() != source.getLen()
                 || (!skipCrc &&
                 || (!skipCrc &&
                        !DistCpUtils.checksumsAreEqual(sourceFS,
                        !DistCpUtils.checksumsAreEqual(sourceFS,
-                                          source.getPath(), targetFS, target))
+                          source.getPath(), null, targetFS, target))
                 || (source.getBlockSize() != targetFileStatus.getBlockSize() &&
                 || (source.getBlockSize() != targetFileStatus.getBlockSize() &&
                       preserve.contains(FileAttribute.BLOCKSIZE))
                       preserve.contains(FileAttribute.BLOCKSIZE))
                );
                );

+ 75 - 32
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java

@@ -18,23 +18,33 @@
 
 
 package org.apache.hadoop.tools.mapred;
 package org.apache.hadoop.tools.mapred;
 
 
-import org.apache.hadoop.tools.util.RetriableCommand;
-import org.apache.hadoop.tools.util.ThrottledInputStream;
-import org.apache.hadoop.tools.util.DistCpUtils;
-import org.apache.hadoop.tools.DistCpOptions.*;
-import org.apache.hadoop.tools.DistCpConstants;
-import org.apache.hadoop.fs.*;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.IOUtils;
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.EnumSet;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CreateFlag;
+import org.apache.hadoop.fs.FileChecksum;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Options.ChecksumOpt;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.tools.DistCpConstants;
+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
+import org.apache.hadoop.tools.util.DistCpUtils;
+import org.apache.hadoop.tools.util.RetriableCommand;
+import org.apache.hadoop.tools.util.ThrottledInputStream;
 
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.annotations.VisibleForTesting;
 
 
-import java.io.*;
-import java.util.EnumSet;
-
 /**
 /**
  * This class extends RetriableCommand to implement the copy of files,
  * This class extends RetriableCommand to implement the copy of files,
  * with retries on failure.
  * with retries on failure.
@@ -44,7 +54,7 @@ public class RetriableFileCopyCommand extends RetriableCommand {
   private static Log LOG = LogFactory.getLog(RetriableFileCopyCommand.class);
   private static Log LOG = LogFactory.getLog(RetriableFileCopyCommand.class);
   private static int BUFFER_SIZE = 8 * 1024;
   private static int BUFFER_SIZE = 8 * 1024;
   private boolean skipCrc = false;
   private boolean skipCrc = false;
-  
+
   /**
   /**
    * Constructor, taking a description of the action.
    * Constructor, taking a description of the action.
    * @param description Verbose description of the copy operation.
    * @param description Verbose description of the copy operation.
@@ -52,7 +62,7 @@ public class RetriableFileCopyCommand extends RetriableCommand {
   public RetriableFileCopyCommand(String description) {
   public RetriableFileCopyCommand(String description) {
     super(description);
     super(description);
   }
   }
- 
+
   /**
   /**
    * Create a RetriableFileCopyCommand.
    * Create a RetriableFileCopyCommand.
    *
    *
@@ -99,15 +109,21 @@ public class RetriableFileCopyCommand extends RetriableCommand {
         LOG.debug("Copying " + sourceFileStatus.getPath() + " to " + target);
         LOG.debug("Copying " + sourceFileStatus.getPath() + " to " + target);
         LOG.debug("Tmp-file path: " + tmpTargetPath);
         LOG.debug("Tmp-file path: " + tmpTargetPath);
       }
       }
-      FileSystem sourceFS = sourceFileStatus.getPath().getFileSystem(
-              configuration);
+      final Path sourcePath = sourceFileStatus.getPath();
+      final FileSystem sourceFS = sourcePath.getFileSystem(configuration);
+      final FileChecksum sourceChecksum = fileAttributes
+          .contains(FileAttribute.CHECKSUMTYPE) ? sourceFS
+          .getFileChecksum(sourcePath) : null;
+
       long bytesRead = copyToTmpFile(tmpTargetPath, targetFS, sourceFileStatus,
       long bytesRead = copyToTmpFile(tmpTargetPath, targetFS, sourceFileStatus,
-                                     context, fileAttributes);
+          context, fileAttributes, sourceChecksum);
 
 
-      compareFileLengths(sourceFileStatus, tmpTargetPath, configuration, bytesRead);
+      compareFileLengths(sourceFileStatus, tmpTargetPath, configuration,
+          bytesRead);
       //At this point, src&dest lengths are same. if length==0, we skip checksum
       //At this point, src&dest lengths are same. if length==0, we skip checksum
       if ((bytesRead != 0) && (!skipCrc)) {
       if ((bytesRead != 0) && (!skipCrc)) {
-        compareCheckSums(sourceFS, sourceFileStatus.getPath(), targetFS, tmpTargetPath);
+        compareCheckSums(sourceFS, sourceFileStatus.getPath(), sourceChecksum,
+            targetFS, tmpTargetPath);
       }
       }
       promoteTmpToTarget(tmpTargetPath, target, targetFS);
       promoteTmpToTarget(tmpTargetPath, target, targetFS);
       return bytesRead;
       return bytesRead;
@@ -118,14 +134,33 @@ public class RetriableFileCopyCommand extends RetriableCommand {
     }
     }
   }
   }
 
 
+  /**
+   * @return the checksum spec of the source checksum if checksum type should be
+   *         preserved
+   */
+  private ChecksumOpt getChecksumOpt(EnumSet<FileAttribute> fileAttributes,
+      FileChecksum sourceChecksum) {
+    if (fileAttributes.contains(FileAttribute.CHECKSUMTYPE)
+        && sourceChecksum != null) {
+      return sourceChecksum.getChecksumOpt();
+    }
+    return null;
+  }
+
   private long copyToTmpFile(Path tmpTargetPath, FileSystem targetFS,
   private long copyToTmpFile(Path tmpTargetPath, FileSystem targetFS,
-                             FileStatus sourceFileStatus, Mapper.Context context,
-                             EnumSet<FileAttribute> fileAttributes)
-                             throws IOException {
-    OutputStream outStream = new BufferedOutputStream(targetFS.create(
-            tmpTargetPath, true, BUFFER_SIZE,
-            getReplicationFactor(fileAttributes, sourceFileStatus, targetFS, tmpTargetPath),
-            getBlockSize(fileAttributes, sourceFileStatus, targetFS, tmpTargetPath), context));
+      FileStatus sourceFileStatus, Mapper.Context context,
+      EnumSet<FileAttribute> fileAttributes, final FileChecksum sourceChecksum)
+      throws IOException {
+    FsPermission permission = FsPermission.getFileDefault().applyUMask(
+        FsPermission.getUMask(targetFS.getConf()));
+    OutputStream outStream = new BufferedOutputStream(
+        targetFS.create(tmpTargetPath, permission,
+            EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), BUFFER_SIZE,
+            getReplicationFactor(fileAttributes, sourceFileStatus, targetFS,
+                tmpTargetPath),
+            getBlockSize(fileAttributes, sourceFileStatus, targetFS,
+                tmpTargetPath),
+            context, getChecksumOpt(fileAttributes, sourceChecksum)));
     return copyBytes(sourceFileStatus, outStream, BUFFER_SIZE, context);
     return copyBytes(sourceFileStatus, outStream, BUFFER_SIZE, context);
   }
   }
 
 
@@ -140,9 +175,10 @@ public class RetriableFileCopyCommand extends RetriableCommand {
   }
   }
 
 
   private void compareCheckSums(FileSystem sourceFS, Path source,
   private void compareCheckSums(FileSystem sourceFS, Path source,
-                                FileSystem targetFS, Path target)
-                                throws IOException {
-    if (!DistCpUtils.checksumsAreEqual(sourceFS, source, targetFS, target)) {
+      FileChecksum sourceChecksum, FileSystem targetFS, Path target)
+      throws IOException {
+    if (!DistCpUtils.checksumsAreEqual(sourceFS, source, sourceChecksum,
+        targetFS, target)) {
       StringBuilder errorMessage = new StringBuilder("Check-sum mismatch between ")
       StringBuilder errorMessage = new StringBuilder("Check-sum mismatch between ")
           .append(source).append(" and ").append(target).append(".");
           .append(source).append(" and ").append(target).append(".");
       if (sourceFS.getFileStatus(source).getBlockSize() != targetFS.getFileStatus(target).getBlockSize()) {
       if (sourceFS.getFileStatus(source).getBlockSize() != targetFS.getFileStatus(target).getBlockSize()) {
@@ -249,11 +285,18 @@ public class RetriableFileCopyCommand extends RetriableCommand {
             sourceFile.getReplication() : targetFS.getDefaultReplication(tmpTargetPath);
             sourceFile.getReplication() : targetFS.getDefaultReplication(tmpTargetPath);
   }
   }
 
 
+  /**
+   * @return the block size of the source file if we need to preserve either
+   *         the block size or the checksum type. Otherwise the default block
+   *         size of the target FS.
+   */
   private static long getBlockSize(
   private static long getBlockSize(
           EnumSet<FileAttribute> fileAttributes,
           EnumSet<FileAttribute> fileAttributes,
           FileStatus sourceFile, FileSystem targetFS, Path tmpTargetPath) {
           FileStatus sourceFile, FileSystem targetFS, Path tmpTargetPath) {
-    return fileAttributes.contains(FileAttribute.BLOCKSIZE)?
-            sourceFile.getBlockSize() : targetFS.getDefaultBlockSize(tmpTargetPath);
+    boolean preserve = fileAttributes.contains(FileAttribute.BLOCKSIZE)
+        || fileAttributes.contains(FileAttribute.CHECKSUMTYPE);
+    return preserve ? sourceFile.getBlockSize() : targetFS
+        .getDefaultBlockSize(tmpTargetPath);
   }
   }
 
 
   /**
   /**
@@ -261,7 +304,7 @@ public class RetriableFileCopyCommand extends RetriableCommand {
    * failures from other kinds of IOExceptions.
    * failures from other kinds of IOExceptions.
    * The failure to read from source is dealt with specially, in the CopyMapper.
    * The failure to read from source is dealt with specially, in the CopyMapper.
    * Such failures may be skipped if the DistCpOptions indicate so.
    * Such failures may be skipped if the DistCpOptions indicate so.
-   * Write failures are intolerable, and amount to CopyMapper failure.  
+   * Write failures are intolerable, and amount to CopyMapper failure.
    */
    */
   public static class CopyReadException extends IOException {
   public static class CopyReadException extends IOException {
     public CopyReadException(Throwable rootCause) {
     public CopyReadException(Throwable rootCause) {

+ 8 - 6
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java

@@ -125,7 +125,7 @@ public class DistCpUtils {
    * @param sourceRootPath - Source root path
    * @param sourceRootPath - Source root path
    * @param childPath - Path for which relative path is required
    * @param childPath - Path for which relative path is required
    * @return - Relative portion of the child path (always prefixed with /
    * @return - Relative portion of the child path (always prefixed with /
-   *           unless it is empty 
+   *           unless it is empty
    */
    */
   public static String getRelativePath(Path sourceRootPath, Path childPath) {
   public static String getRelativePath(Path sourceRootPath, Path childPath) {
     String childPathString = childPath.toUri().getPath();
     String childPathString = childPath.toUri().getPath();
@@ -277,9 +277,11 @@ public class DistCpUtils {
    * If checksums's can't be retrieved, it doesn't fail the test
    * If checksums's can't be retrieved, it doesn't fail the test
    * Only time the comparison would fail is when checksums are
    * Only time the comparison would fail is when checksums are
    * available and they don't match
    * available and they don't match
-   *                                  
+   *
    * @param sourceFS FileSystem for the source path.
    * @param sourceFS FileSystem for the source path.
    * @param source The source path.
    * @param source The source path.
+   * @param sourceChecksum The checksum of the source file. If it is null we
+   * still need to retrieve it through sourceFS.
    * @param targetFS FileSystem for the target path.
    * @param targetFS FileSystem for the target path.
    * @param target The target path.
    * @param target The target path.
    * @return If either checksum couldn't be retrieved, the function returns
    * @return If either checksum couldn't be retrieved, the function returns
@@ -288,12 +290,12 @@ public class DistCpUtils {
    * @throws IOException if there's an exception while retrieving checksums.
    * @throws IOException if there's an exception while retrieving checksums.
    */
    */
   public static boolean checksumsAreEqual(FileSystem sourceFS, Path source,
   public static boolean checksumsAreEqual(FileSystem sourceFS, Path source,
-                                   FileSystem targetFS, Path target)
-                                   throws IOException {
-    FileChecksum sourceChecksum = null;
+      FileChecksum sourceChecksum, FileSystem targetFS, Path target)
+      throws IOException {
     FileChecksum targetChecksum = null;
     FileChecksum targetChecksum = null;
     try {
     try {
-      sourceChecksum = sourceFS.getFileChecksum(source);
+      sourceChecksum = sourceChecksum != null ? sourceChecksum : sourceFS
+          .getFileChecksum(source);
       targetChecksum = targetFS.getFileChecksum(target);
       targetChecksum = targetFS.getFileChecksum(target);
     } catch (IOException e) {
     } catch (IOException e) {
       LOG.error("Unable to retrieve checksum for " + source + " or " + target, e);
       LOG.error("Unable to retrieve checksum for " + source + " or " + target, e);

+ 32 - 3
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java

@@ -110,7 +110,7 @@ public class TestOptionsParser {
         "hdfs://localhost:8020/target/"});
         "hdfs://localhost:8020/target/"});
     Assert.assertEquals(options.getMapBandwidth(), 11);
     Assert.assertEquals(options.getMapBandwidth(), 11);
   }
   }
-  
+
   @Test(expected=IllegalArgumentException.class)
   @Test(expected=IllegalArgumentException.class)
   public void testParseNonPositiveBandwidth() {
   public void testParseNonPositiveBandwidth() {
     OptionsParser.parse(new String[] {
     OptionsParser.parse(new String[] {
@@ -119,7 +119,7 @@ public class TestOptionsParser {
         "hdfs://localhost:8020/source/first",
         "hdfs://localhost:8020/source/first",
         "hdfs://localhost:8020/target/"});
         "hdfs://localhost:8020/target/"});
   }
   }
-  
+
   @Test(expected=IllegalArgumentException.class)
   @Test(expected=IllegalArgumentException.class)
   public void testParseZeroBandwidth() {
   public void testParseZeroBandwidth() {
     OptionsParser.parse(new String[] {
     OptionsParser.parse(new String[] {
@@ -397,6 +397,7 @@ public class TestOptionsParser {
     Assert.assertFalse(options.shouldPreserve(FileAttribute.PERMISSION));
     Assert.assertFalse(options.shouldPreserve(FileAttribute.PERMISSION));
     Assert.assertFalse(options.shouldPreserve(FileAttribute.USER));
     Assert.assertFalse(options.shouldPreserve(FileAttribute.USER));
     Assert.assertFalse(options.shouldPreserve(FileAttribute.GROUP));
     Assert.assertFalse(options.shouldPreserve(FileAttribute.GROUP));
+    Assert.assertFalse(options.shouldPreserve(FileAttribute.CHECKSUMTYPE));
 
 
     options = OptionsParser.parse(new String[] {
     options = OptionsParser.parse(new String[] {
         "-p",
         "-p",
@@ -408,6 +409,7 @@ public class TestOptionsParser {
     Assert.assertTrue(options.shouldPreserve(FileAttribute.PERMISSION));
     Assert.assertTrue(options.shouldPreserve(FileAttribute.PERMISSION));
     Assert.assertTrue(options.shouldPreserve(FileAttribute.USER));
     Assert.assertTrue(options.shouldPreserve(FileAttribute.USER));
     Assert.assertTrue(options.shouldPreserve(FileAttribute.GROUP));
     Assert.assertTrue(options.shouldPreserve(FileAttribute.GROUP));
+    Assert.assertTrue(options.shouldPreserve(FileAttribute.CHECKSUMTYPE));
 
 
     options = OptionsParser.parse(new String[] {
     options = OptionsParser.parse(new String[] {
         "-p",
         "-p",
@@ -418,6 +420,7 @@ public class TestOptionsParser {
     Assert.assertTrue(options.shouldPreserve(FileAttribute.PERMISSION));
     Assert.assertTrue(options.shouldPreserve(FileAttribute.PERMISSION));
     Assert.assertTrue(options.shouldPreserve(FileAttribute.USER));
     Assert.assertTrue(options.shouldPreserve(FileAttribute.USER));
     Assert.assertTrue(options.shouldPreserve(FileAttribute.GROUP));
     Assert.assertTrue(options.shouldPreserve(FileAttribute.GROUP));
+    Assert.assertTrue(options.shouldPreserve(FileAttribute.CHECKSUMTYPE));
 
 
     options = OptionsParser.parse(new String[] {
     options = OptionsParser.parse(new String[] {
         "-pbr",
         "-pbr",
@@ -429,6 +432,7 @@ public class TestOptionsParser {
     Assert.assertFalse(options.shouldPreserve(FileAttribute.PERMISSION));
     Assert.assertFalse(options.shouldPreserve(FileAttribute.PERMISSION));
     Assert.assertFalse(options.shouldPreserve(FileAttribute.USER));
     Assert.assertFalse(options.shouldPreserve(FileAttribute.USER));
     Assert.assertFalse(options.shouldPreserve(FileAttribute.GROUP));
     Assert.assertFalse(options.shouldPreserve(FileAttribute.GROUP));
+    Assert.assertFalse(options.shouldPreserve(FileAttribute.CHECKSUMTYPE));
 
 
     options = OptionsParser.parse(new String[] {
     options = OptionsParser.parse(new String[] {
         "-pbrgup",
         "-pbrgup",
@@ -440,6 +444,31 @@ public class TestOptionsParser {
     Assert.assertTrue(options.shouldPreserve(FileAttribute.PERMISSION));
     Assert.assertTrue(options.shouldPreserve(FileAttribute.PERMISSION));
     Assert.assertTrue(options.shouldPreserve(FileAttribute.USER));
     Assert.assertTrue(options.shouldPreserve(FileAttribute.USER));
     Assert.assertTrue(options.shouldPreserve(FileAttribute.GROUP));
     Assert.assertTrue(options.shouldPreserve(FileAttribute.GROUP));
+    Assert.assertFalse(options.shouldPreserve(FileAttribute.CHECKSUMTYPE));
+
+    options = OptionsParser.parse(new String[] {
+        "-pbrgupc",
+        "-f",
+        "hdfs://localhost:8020/source/first",
+        "hdfs://localhost:8020/target/"});
+    Assert.assertTrue(options.shouldPreserve(FileAttribute.BLOCKSIZE));
+    Assert.assertTrue(options.shouldPreserve(FileAttribute.REPLICATION));
+    Assert.assertTrue(options.shouldPreserve(FileAttribute.PERMISSION));
+    Assert.assertTrue(options.shouldPreserve(FileAttribute.USER));
+    Assert.assertTrue(options.shouldPreserve(FileAttribute.GROUP));
+    Assert.assertTrue(options.shouldPreserve(FileAttribute.CHECKSUMTYPE));
+
+    options = OptionsParser.parse(new String[] {
+        "-pc",
+        "-f",
+        "hdfs://localhost:8020/source/first",
+        "hdfs://localhost:8020/target/"});
+    Assert.assertFalse(options.shouldPreserve(FileAttribute.BLOCKSIZE));
+    Assert.assertFalse(options.shouldPreserve(FileAttribute.REPLICATION));
+    Assert.assertFalse(options.shouldPreserve(FileAttribute.PERMISSION));
+    Assert.assertFalse(options.shouldPreserve(FileAttribute.USER));
+    Assert.assertFalse(options.shouldPreserve(FileAttribute.GROUP));
+    Assert.assertTrue(options.shouldPreserve(FileAttribute.CHECKSUMTYPE));
 
 
     options = OptionsParser.parse(new String[] {
     options = OptionsParser.parse(new String[] {
         "-p",
         "-p",
@@ -452,7 +481,7 @@ public class TestOptionsParser {
       attribIterator.next();
       attribIterator.next();
       i++;
       i++;
     }
     }
-    Assert.assertEquals(i, 5);
+    Assert.assertEquals(i, 6);
 
 
     try {
     try {
       OptionsParser.parse(new String[] {
       OptionsParser.parse(new String[] {

+ 92 - 30
hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java

@@ -18,18 +18,28 @@
 
 
 package org.apache.hadoop.tools.mapred;
 package org.apache.hadoop.tools.mapred;
 
 
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.security.PrivilegedAction;
+import java.util.ArrayList;
+import java.util.EnumSet;
+import java.util.List;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CreateFlag;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Options.ChecksumOpt;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.*;
+import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.tools.DistCpConstants;
 import org.apache.hadoop.tools.DistCpConstants;
@@ -37,23 +47,17 @@ import org.apache.hadoop.tools.DistCpOptionSwitch;
 import org.apache.hadoop.tools.DistCpOptions;
 import org.apache.hadoop.tools.DistCpOptions;
 import org.apache.hadoop.tools.StubContext;
 import org.apache.hadoop.tools.StubContext;
 import org.apache.hadoop.tools.util.DistCpUtils;
 import org.apache.hadoop.tools.util.DistCpUtils;
+import org.apache.hadoop.util.DataChecksum;
 import org.junit.Assert;
 import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.Test;
 
 
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.security.PrivilegedAction;
-import java.util.ArrayList;
-import java.util.EnumSet;
-import java.util.List;
-
 public class TestCopyMapper {
 public class TestCopyMapper {
   private static final Log LOG = LogFactory.getLog(TestCopyMapper.class);
   private static final Log LOG = LogFactory.getLog(TestCopyMapper.class);
   private static List<Path> pathList = new ArrayList<Path>();
   private static List<Path> pathList = new ArrayList<Path>();
   private static int nFiles = 0;
   private static int nFiles = 0;
   private static final int DEFAULT_FILE_SIZE = 1024;
   private static final int DEFAULT_FILE_SIZE = 1024;
+  private static final long NON_DEFAULT_BLOCK_SIZE = 4096;
 
 
   private static MiniDFSCluster cluster;
   private static MiniDFSCluster cluster;
 
 
@@ -119,12 +123,27 @@ public class TestCopyMapper {
     mkdirs(SOURCE_PATH + "/2/3/4");
     mkdirs(SOURCE_PATH + "/2/3/4");
     mkdirs(SOURCE_PATH + "/2/3");
     mkdirs(SOURCE_PATH + "/2/3");
     mkdirs(SOURCE_PATH + "/5");
     mkdirs(SOURCE_PATH + "/5");
-    touchFile(SOURCE_PATH + "/5/6", true);
+    touchFile(SOURCE_PATH + "/5/6", true, null);
     mkdirs(SOURCE_PATH + "/7");
     mkdirs(SOURCE_PATH + "/7");
     mkdirs(SOURCE_PATH + "/7/8");
     mkdirs(SOURCE_PATH + "/7/8");
     touchFile(SOURCE_PATH + "/7/8/9");
     touchFile(SOURCE_PATH + "/7/8/9");
   }
   }
 
 
+  private static void createSourceDataWithDifferentChecksumType()
+      throws Exception {
+    mkdirs(SOURCE_PATH + "/1");
+    mkdirs(SOURCE_PATH + "/2");
+    mkdirs(SOURCE_PATH + "/2/3/4");
+    mkdirs(SOURCE_PATH + "/2/3");
+    mkdirs(SOURCE_PATH + "/5");
+    touchFile(SOURCE_PATH + "/5/6", new ChecksumOpt(DataChecksum.Type.CRC32,
+        512));
+    mkdirs(SOURCE_PATH + "/7");
+    mkdirs(SOURCE_PATH + "/7/8");
+    touchFile(SOURCE_PATH + "/7/8/9", new ChecksumOpt(DataChecksum.Type.CRC32C,
+        512));
+  }
+
   private static void mkdirs(String path) throws Exception {
   private static void mkdirs(String path) throws Exception {
     FileSystem fileSystem = cluster.getFileSystem();
     FileSystem fileSystem = cluster.getFileSystem();
     final Path qualifiedPath = new Path(path).makeQualified(fileSystem.getUri(),
     final Path qualifiedPath = new Path(path).makeQualified(fileSystem.getUri(),
@@ -134,21 +153,31 @@ public class TestCopyMapper {
   }
   }
 
 
   private static void touchFile(String path) throws Exception {
   private static void touchFile(String path) throws Exception {
-    touchFile(path, false);
+    touchFile(path, false, null);
   }
   }
 
 
-  private static void touchFile(String path, boolean createMultipleBlocks) throws Exception {
-    final long NON_DEFAULT_BLOCK_SIZE = 4096;
+  private static void touchFile(String path, ChecksumOpt checksumOpt)
+      throws Exception {
+    // create files with specific checksum opt and non-default block size
+    touchFile(path, true, checksumOpt);
+  }
+
+  private static void touchFile(String path, boolean createMultipleBlocks,
+      ChecksumOpt checksumOpt) throws Exception {
     FileSystem fs;
     FileSystem fs;
     DataOutputStream outputStream = null;
     DataOutputStream outputStream = null;
     try {
     try {
       fs = cluster.getFileSystem();
       fs = cluster.getFileSystem();
       final Path qualifiedPath = new Path(path).makeQualified(fs.getUri(),
       final Path qualifiedPath = new Path(path).makeQualified(fs.getUri(),
-                                                      fs.getWorkingDirectory());
-      final long blockSize = createMultipleBlocks? NON_DEFAULT_BLOCK_SIZE : fs.getDefaultBlockSize(qualifiedPath) * 2;
-      outputStream = fs.create(qualifiedPath, true, 0,
-              (short)(fs.getDefaultReplication(qualifiedPath)*2),
-              blockSize);
+          fs.getWorkingDirectory());
+      final long blockSize = createMultipleBlocks ? NON_DEFAULT_BLOCK_SIZE : fs
+          .getDefaultBlockSize(qualifiedPath) * 2;
+      FsPermission permission = FsPermission.getFileDefault().applyUMask(
+          FsPermission.getUMask(fs.getConf()));
+      outputStream = fs.create(qualifiedPath, permission,
+          EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), 0,
+          (short) (fs.getDefaultReplication(qualifiedPath) * 2), blockSize,
+          null, checksumOpt);
       byte[] bytes = new byte[DEFAULT_FILE_SIZE];
       byte[] bytes = new byte[DEFAULT_FILE_SIZE];
       outputStream.write(bytes);
       outputStream.write(bytes);
       long fileSize = DEFAULT_FILE_SIZE;
       long fileSize = DEFAULT_FILE_SIZE;
@@ -171,17 +200,40 @@ public class TestCopyMapper {
     }
     }
   }
   }
 
 
+  @Test
+  public void testCopyWithDifferentChecksumType() throws Exception {
+    testCopy(true);
+  }
+
   @Test(timeout=40000)
   @Test(timeout=40000)
   public void testRun() {
   public void testRun() {
+    testCopy(false);
+  }
+
+  private void testCopy(boolean preserveChecksum) {
     try {
     try {
       deleteState();
       deleteState();
-      createSourceData();
+      if (preserveChecksum) {
+        createSourceDataWithDifferentChecksumType();
+      } else {
+        createSourceData();
+      }
 
 
       FileSystem fs = cluster.getFileSystem();
       FileSystem fs = cluster.getFileSystem();
       CopyMapper copyMapper = new CopyMapper();
       CopyMapper copyMapper = new CopyMapper();
       StubContext stubContext = new StubContext(getConfiguration(), null, 0);
       StubContext stubContext = new StubContext(getConfiguration(), null, 0);
       Mapper<Text, FileStatus, Text, Text>.Context context
       Mapper<Text, FileStatus, Text, Text>.Context context
               = stubContext.getContext();
               = stubContext.getContext();
+
+      Configuration configuration = context.getConfiguration();
+      EnumSet<DistCpOptions.FileAttribute> fileAttributes
+              = EnumSet.of(DistCpOptions.FileAttribute.REPLICATION);
+      if (preserveChecksum) {
+        fileAttributes.add(DistCpOptions.FileAttribute.CHECKSUMTYPE);
+      }
+      configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(),
+              DistCpUtils.packAttributes(fileAttributes));
+
       copyMapper.setup(context);
       copyMapper.setup(context);
 
 
       for (Path path: pathList) {
       for (Path path: pathList) {
@@ -195,19 +247,29 @@ public class TestCopyMapper {
                 .replaceAll(SOURCE_PATH, TARGET_PATH));
                 .replaceAll(SOURCE_PATH, TARGET_PATH));
         Assert.assertTrue(fs.exists(targetPath));
         Assert.assertTrue(fs.exists(targetPath));
         Assert.assertTrue(fs.isFile(targetPath) == fs.isFile(path));
         Assert.assertTrue(fs.isFile(targetPath) == fs.isFile(path));
-        Assert.assertEquals(fs.getFileStatus(path).getReplication(),
-                fs.getFileStatus(targetPath).getReplication());
-        Assert.assertEquals(fs.getFileStatus(path).getBlockSize(),
-                fs.getFileStatus(targetPath).getBlockSize());
-        Assert.assertTrue(!fs.isFile(targetPath) ||
-                fs.getFileChecksum(targetPath).equals(
-                        fs.getFileChecksum(path)));
+        FileStatus sourceStatus = fs.getFileStatus(path);
+        FileStatus targetStatus = fs.getFileStatus(targetPath);
+        Assert.assertEquals(sourceStatus.getReplication(),
+            targetStatus.getReplication());
+        if (preserveChecksum) {
+          Assert.assertEquals(sourceStatus.getBlockSize(),
+              targetStatus.getBlockSize());
+        }
+        Assert.assertTrue(!fs.isFile(targetPath)
+            || fs.getFileChecksum(targetPath).equals(fs.getFileChecksum(path)));
       }
       }
 
 
       Assert.assertEquals(pathList.size(),
       Assert.assertEquals(pathList.size(),
               stubContext.getReporter().getCounter(CopyMapper.Counter.COPY).getValue());
               stubContext.getReporter().getCounter(CopyMapper.Counter.COPY).getValue());
-      Assert.assertEquals(nFiles * DEFAULT_FILE_SIZE,
-              stubContext.getReporter().getCounter(CopyMapper.Counter.BYTESCOPIED).getValue());
+      if (!preserveChecksum) {
+        Assert.assertEquals(nFiles * DEFAULT_FILE_SIZE, stubContext
+            .getReporter().getCounter(CopyMapper.Counter.BYTESCOPIED)
+            .getValue());
+      } else {
+        Assert.assertEquals(nFiles * NON_DEFAULT_BLOCK_SIZE * 2, stubContext
+            .getReporter().getCounter(CopyMapper.Counter.BYTESCOPIED)
+            .getValue());
+      }
 
 
       testCopyingExistingFiles(fs, copyMapper, context);
       testCopyingExistingFiles(fs, copyMapper, context);
       for (Text value : stubContext.getWriter().values()) {
       for (Text value : stubContext.getWriter().values()) {
@@ -309,7 +371,7 @@ public class TestCopyMapper {
       UserGroupInformation tmpUser = UserGroupInformation.createRemoteUser("guest");
       UserGroupInformation tmpUser = UserGroupInformation.createRemoteUser("guest");
 
 
       final CopyMapper copyMapper = new CopyMapper();
       final CopyMapper copyMapper = new CopyMapper();
-      
+
       final Mapper<Text, FileStatus, Text, Text>.Context context =  tmpUser.
       final Mapper<Text, FileStatus, Text, Text>.Context context =  tmpUser.
           doAs(new PrivilegedAction<Mapper<Text, FileStatus, Text, Text>.Context>() {
           doAs(new PrivilegedAction<Mapper<Text, FileStatus, Text, Text>.Context>() {
         @Override
         @Override
@@ -535,7 +597,7 @@ public class TestCopyMapper {
 
 
       final Mapper<Text, FileStatus, Text, Text>.Context context
       final Mapper<Text, FileStatus, Text, Text>.Context context
               = stubContext.getContext();
               = stubContext.getContext();
-      
+
       context.getConfiguration().set(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
       context.getConfiguration().set(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
         DistCpUtils.packAttributes(preserveStatus));
         DistCpUtils.packAttributes(preserveStatus));
 
 

文件差异内容过多而无法显示
+ 168 - 168
hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json


+ 7 - 0
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java

@@ -54,6 +54,7 @@ import org.apache.hadoop.yarn.api.records.QueueInfo;
 import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
 import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
@@ -865,4 +866,10 @@ public class ResourceSchedulerWrapper implements
   public RMContainer getRMContainer(ContainerId containerId) {
   public RMContainer getRMContainer(ContainerId containerId) {
     return null;
     return null;
   }
   }
+
+  @Override
+  public String moveApplication(ApplicationId appId, String newQueue)
+      throws YarnException {
+    return scheduler.moveApplication(appId, newQueue);
+  }
 }
 }

+ 69 - 0
hadoop-yarn-project/CHANGES.txt

@@ -9,6 +9,14 @@ Trunk - Unreleased
     YARN-1496. Protocol additions to allow moving apps between queues (Sandy
     YARN-1496. Protocol additions to allow moving apps between queues (Sandy
     Ryza)
     Ryza)
 
 
+    YARN-1498. Common scheduler changes for moving apps between queues (Sandy
+    Ryza)
+
+    YARN-1504. RM changes for moving apps between queues (Sandy Ryza)
+
+    YARN-1499. Fair Scheduler changes for moving apps between queues (Sandy
+    Ryza)
+
   IMPROVEMENTS
   IMPROVEMENTS
 
 
   OPTIMIZATIONS
   OPTIMIZATIONS
@@ -74,6 +82,27 @@ Release 2.4.0 - UNRELEASED
     YARN-1413. Implemented serving of aggregated-logs in the ApplicationHistory
     YARN-1413. Implemented serving of aggregated-logs in the ApplicationHistory
     server. (Mayank Bansal via vinodkv)
     server. (Mayank Bansal via vinodkv)
 
 
+    YARN-1633. Defined user-facing entity, entity-info and event objects related
+    to Application Timeline feature. (Zhijie Shen via vinodkv)
+
+    YARN-1611. Introduced the concept of a configuration provider which can be
+    used by ResourceManager to read configuration locally or from remote systems
+    so as to help RM failover. (Xuan Gong via vinodkv)
+
+    YARN-1659. Defined the ApplicationTimelineStore store as an abstraction for
+    implementing different storage impls for storing timeline information.
+    (Billie Rinaldi via vinodkv)
+
+    YARN-1634. Added a testable in-memory implementation of
+    ApplicationTimelineStore. (Zhijie Shen via vinodkv)
+
+    YARN-1461. Added tags for YARN applications and changed RM to handle them.
+    (Karthik Kambatla via zjshen)
+
+    YARN-1636. Augmented Application-history server's web-services to also expose
+    new APIs for retrieving and storing timeline information. (Zhijie Shen via
+    vinodkv)
+
   IMPROVEMENTS
   IMPROVEMENTS
 
 
     YARN-1007. Enhance History Reader interface for Containers. (Mayank Bansal via
     YARN-1007. Enhance History Reader interface for Containers. (Mayank Bansal via
@@ -88,6 +117,28 @@ Release 2.4.0 - UNRELEASED
     YARN-1630. Introduce timeout for async polling operations in YarnClientImpl
     YARN-1630. Introduce timeout for async polling operations in YarnClientImpl
     (Aditya Acharya via Sandy Ryza)
     (Aditya Acharya via Sandy Ryza)
 
 
+    YARN-1617. Remove ancient comment and surround LOG.debug in
+    AppSchedulingInfo.allocate (Sandy Ryza)
+
+    YARN-1639. Modified RM HA configuration handling to have a way of not
+    requiring separate configuration files for each RM. (Xuan Gong via vinodkv)
+
+    YARN-1668. Modified RM HA handling of admin-acls to be available across RM
+    failover by making using of a remote configuration-provider. (Xuan Gong via
+    vinodkv)
+
+    YARN-1667. Modified RM HA handling of super users (with proxying ability) to
+    be available across RM failover by making using of a remote
+    configuration-provider. (Xuan Gong via vinodkv)
+
+    YARN-1285. Changed the default value of yarn.acl.enable in yarn-default.xml
+    to be consistent with what exists (false) in the code and documentation.
+    (Kenji Kikushima via vinodkv)
+
+    YARN-1669. Modified RM HA handling of protocol level service-ACLS to
+    be available across RM failover by making using of a remote
+    configuration-provider. (Xuan Gong via vinodkv)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
   BUG FIXES
   BUG FIXES
@@ -134,6 +185,15 @@ Release 2.4.0 - UNRELEASED
 
 
     YARN-1642. RMDTRenewer#getRMClient should use ClientRMProxy (kasha)
     YARN-1642. RMDTRenewer#getRMClient should use ClientRMProxy (kasha)
 
 
+    YARN-1632. TestApplicationMasterServices should be under
+    org.apache.hadoop.yarn.server.resourcemanager package (Chen He via jeagles)
+
+    YARN-1673. Fix option parsing in YARN's application CLI after it is broken
+    by YARN-967. (Mayank Bansal via vinodkv)
+
+    YARN-1684. Fixed history server heap size in yarn script. (Billie Rinaldi
+    via zjshen)
+
 Release 2.3.0 - UNRELEASED
 Release 2.3.0 - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES
@@ -567,6 +627,15 @@ Release 2.3.0 - UNRELEASED
 
 
     YARN-1629. IndexOutOfBoundsException in MaxRunningAppsEnforcer (Sandy Ryza)
     YARN-1629. IndexOutOfBoundsException in MaxRunningAppsEnforcer (Sandy Ryza)
 
 
+    YARN-1628. Fixed the test failure in TestContainerManagerSecurity. (Vinod
+    Kumar Vavilapalli via zjshen)
+
+    YARN-1661. Fixed DS ApplicationMaster to write the correct exit log. (Vinod
+    Kumar Vavilapalli via zjshen)
+
+    YARN-1689. Made RMAppAttempt get killed when RMApp is at ACCEPTED. (Vinod
+    Kumar Vavilapalli via zjshen)
+
 Release 2.2.0 - 2013-10-13
 Release 2.2.0 - 2013-10-13
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/bin/yarn

@@ -204,7 +204,7 @@ elif [ "$COMMAND" = "historyserver" ] ; then
   CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/ahs-config/log4j.properties
   CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/ahs-config/log4j.properties
   CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
   CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
   YARN_OPTS="$YARN_OPTS $YARN_HISTORYSERVER_OPTS"
   YARN_OPTS="$YARN_OPTS $YARN_HISTORYSERVER_OPTS"
-  if [ "$YARN_RESOURCEMANAGER_HEAPSIZE" != "" ]; then
+  if [ "$YARN_HISTORYSERVER_HEAPSIZE" != "" ]; then
     JAVA_HEAP_MAX="-Xmx""$YARN_HISTORYSERVER_HEAPSIZE""m"
     JAVA_HEAP_MAX="-Xmx""$YARN_HISTORYSERVER_HEAPSIZE""m"
   fi  
   fi  
 elif [ "$COMMAND" = "nodemanager" ] ; then
 elif [ "$COMMAND" = "nodemanager" ] ; then

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/bin/yarn.cmd

@@ -207,7 +207,7 @@ goto :eof
   set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%\ahs-config\log4j.properties
   set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%\ahs-config\log4j.properties
   set CLASS=org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer
   set CLASS=org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer
   set YARN_OPTS=%YARN_OPTS% %HADOOP_HISTORYSERVER_OPTS%
   set YARN_OPTS=%YARN_OPTS% %HADOOP_HISTORYSERVER_OPTS%
-  if defined YARN_RESOURCEMANAGER_HEAPSIZE (
+  if defined YARN_HISTORYSERVER_HEAPSIZE (
     set JAVA_HEAP_MAX=-Xmx%YARN_HISTORYSERVER_HEAPSIZE%m
     set JAVA_HEAP_MAX=-Xmx%YARN_HISTORYSERVER_HEAPSIZE%m
   )
   )
   goto :eof
   goto :eof

+ 38 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/ApplicationsRequestScope.java

@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.protocolrecords;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * Enumeration that controls the scope of applications fetched
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Unstable
+public enum ApplicationsRequestScope {
+  /** All jobs */
+  ALL,
+
+  /** Jobs viewable by current user */
+  VIEWABLE,
+
+  /** Jobs owned by current user */
+  OWN
+}

部分文件因为文件数量过多而无法显示