Browse Source

Merge r1555021 through r1563384 from trunk.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-5535@1563385 13f79535-47bb-0310-9956-ffa450edef68
Tsz-wo Sze 11 years ago
parent
commit
35e59eeaa2
58 changed files with 4035 additions and 610 deletions
  1. 124 0
      dev-support/create-release.sh
  2. 2 2
      dev-support/test-patch.sh
  3. 1 0
      hadoop-common-project/hadoop-annotations/src/main/java/org/apache/hadoop/classification/InterfaceStability.java
  4. 8 0
      hadoop-common-project/hadoop-common/CHANGES.txt
  5. 2 0
      hadoop-common-project/hadoop-common/pom.xml
  6. 2 0
      hadoop-common-project/hadoop-common/src/CMakeLists.txt
  7. 10 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java
  8. 90 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/SharedFileDescriptorFactory.java
  9. 50 134
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
  10. 478 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocketWatcher.java
  11. 125 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CloseableReferenceCount.java
  12. 38 0
      hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/NativeIO.c
  13. 162 0
      hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/SharedFileDescriptorFactory.c
  14. 247 0
      hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocketWatcher.c
  15. 82 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/nativeio/TestSharedFileDescriptorFactory.java
  16. 150 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocketWatcher.java
  17. 26 2
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java
  18. 7 5
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java
  19. 13 1
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestReaddir.java
  20. 25 0
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestDFSClientCache.java
  21. 9 0
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java
  22. 14 0
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  23. 2 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
  24. 302 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitSharedMemorySegment.java
  25. 21 8
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
  26. 11 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
  27. 94 71
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
  28. 5 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java
  29. 25 8
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
  30. 7 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
  31. 14 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
  32. 104 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/client/TestShortCircuitSharedMemorySegment.java
  33. 71 165
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/BlockReportTestBase.java
  34. 16 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java
  35. 205 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDnRespectsBlockReportSplitThreshold.java
  36. 42 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestNNHandlesBlockReportPerStorage.java
  37. 39 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestNNHandlesCombinedBlockReport.java
  38. 168 168
      hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json
  39. 7 0
      hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java
  40. 8 0
      hadoop-yarn-project/CHANGES.txt
  41. 88 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/apptimeline/ATSEntities.java
  42. 314 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/apptimeline/ATSEntity.java
  43. 134 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/apptimeline/ATSEvent.java
  44. 189 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/apptimeline/ATSEvents.java
  45. 21 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/apptimeline/package-info.java
  46. 113 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/apptimeline/TestApplicationTimelineRecords.java
  47. 70 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java
  48. 1 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAuditLogger.java
  49. 1 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java
  50. 32 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
  51. 44 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMoveEvent.java
  52. 9 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
  53. 14 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java
  54. 2 3
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
  55. 1 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
  56. 1 37
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java
  57. 15 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java
  58. 180 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestMoveApplication.java

+ 124 - 0
dev-support/create-release.sh

@@ -0,0 +1,124 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Function to probe the exit code of the script commands, 
+# and stop in the case of failure with an contextual error 
+# message.
+run() {
+  echo "\$ ${@}"
+  "${@}"
+  exitCode=$?
+  if [[ $exitCode != 0 ]]; then
+    echo
+    echo "Failed! running ${@} in `pwd`"
+    echo
+    exit $exitCode
+  fi
+}
+
+doMD5() {
+  MD5CMD="md5sum"
+  which $MD5CMD
+  if [[ $? != 0 ]]; then
+    MD5CMD="md5"
+  fi
+  run $MD5CMD ${1} > ${1}.md5
+}
+
+# If provided, the created release artifacts will be tagged with it 
+# (use RC#, i.e: RC0). Do not use a label to create the final release 
+# artifact.
+RC_LABEL=$1
+
+# Extract Hadoop version from POM
+HADOOP_VERSION=`cat pom.xml | grep "<version>" | head -1 | sed 's|^ *<version>||' | sed 's|</version>.*$||'`
+
+echo
+echo "*****************************************************************"
+echo
+echo "Hadoop version to create release artifacts: ${HADOOP_VERSION}"
+echo 
+echo "Release Candidate Label: ${RC_LABEL}"
+echo
+echo "*****************************************************************"
+echo
+
+if [[ ! -z ${RC_LABEL} ]]; then
+  RC_LABEL="-${RC_LABEL}"
+fi
+
+# Get Maven command
+if [ -z "$MAVEN_HOME" ]; then
+  MVN=mvn
+else
+  MVN=$MAVEN_HOME/bin/mvn
+fi
+
+ARTIFACTS_DIR="target/artifacts"
+
+# Create staging dir for release artifacts
+
+run mkdir -p ${ARTIFACTS_DIR}
+
+# Create RAT report
+run ${MVN} apache-rat:check
+
+# Create SRC and BIN tarballs for release,
+# Using 'install’ goal instead of 'package' so artifacts are available 
+# in the Maven local cache for the site generation
+run ${MVN} install -Pdist,docs,src,native -DskipTests -Dtar
+
+# Create site for release
+run ${MVN} site site:stage -Pdist -Psrc
+run mv target/staging/hadoop-project target/r${HADOOP_VERSION}/
+run cd target/
+run tar czf hadoop-site-${HADOOP_VERSION}.tar.gz r${HADOOP_VERSION}/*
+run cd ..
+
+# Stage RAT report
+find . -name rat.txt | xargs -I% cat % > ${ARTIFACTS_DIR}/hadoop-${HADOOP_VERSION}${RC_LABEL}-rat.txt
+
+# Stage CHANGES.txt files
+run cp ./hadoop-common-project/hadoop-common/CHANGES.txt ${ARTIFACTS_DIR}/CHANGES-COMMON-${HADOOP_VERSION}${RC_LABEL}.txt
+run cp ./hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ${ARTIFACTS_DIR}/CHANGES-HDFS--${HADOOP_VERSION}${RC_LABEL}.txt
+run cp ./hadoop-mapreduce-project/CHANGES.txt ${ARTIFACTS_DIR}/CHANGES-MAPREDUCE-${HADOOP_VERSION}${RC_LABEL}.txt
+run cp ./hadoop-yarn-project/CHANGES.txt ${ARTIFACTS_DIR}/CHANGES-YARN-${HADOOP_VERSION}${RC_LABEL}.txt
+
+# Stage BIN tarball
+run mv hadoop-dist/target/hadoop-${HADOOP_VERSION}.tar.gz ${ARTIFACTS_DIR}/hadoop-${HADOOP_VERSION}${RC_LABEL}.tar.gz
+
+# State SRC tarball
+run mv hadoop-dist/target/hadoop-${HADOOP_VERSION}-src.tar.gz ${ARTIFACTS_DIR}/hadoop-${HADOOP_VERSION}${RC_LABEL}-src.tar.gz
+
+# Stage SITE tarball
+run mv target/hadoop-site-${HADOOP_VERSION}.tar.gz ${ARTIFACTS_DIR}/hadoop-${HADOOP_VERSION}${RC_LABEL}-site.tar.gz
+
+# MD5 SRC and BIN tarballs
+doMD5 ${ARTIFACTS_DIR}/hadoop-${HADOOP_VERSION}${RC_LABEL}.tar.gz
+doMD5 ${ARTIFACTS_DIR}/hadoop-${HADOOP_VERSION}${RC_LABEL}-src.tar.gz
+
+run cd ${ARTIFACTS_DIR}
+ARTIFACTS_DIR=`pwd`
+echo
+echo "Congratulations, you have successfully built the release"
+echo "artifacts for Apache Hadoop ${HADOOP_VERSION}${RC_LABEL}"
+echo
+echo "The artifacts for this run are available at ${ARTIFACTS_DIR}:"
+run ls -1 ${ARTIFACTS_DIR}
+echo 
+echo "Remember to sign them before staging them on the open"
+echo

+ 2 - 2
dev-support/test-patch.sh

@@ -425,9 +425,9 @@ checkJavadocWarnings () {
   echo ""
   echo "There appear to be $javadocWarnings javadoc warnings generated by the patched build."
 
-  #There are 12 warnings that are caused by things that are caused by using sun internal APIs.
+  #There are 14 warnings that are caused by things that are caused by using sun internal APIs.
   #There are 2 warnings that are caused by the Apache DS Dn class used in MiniKdc.
-  OK_JAVADOC_WARNINGS=14;
+  OK_JAVADOC_WARNINGS=16;
   ### if current warnings greater than OK_JAVADOC_WARNINGS
   if [[ $javadocWarnings -ne $OK_JAVADOC_WARNINGS ]] ; then
     JIRA_COMMENT="$JIRA_COMMENT

+ 1 - 0
hadoop-common-project/hadoop-annotations/src/main/java/org/apache/hadoop/classification/InterfaceStability.java

@@ -33,6 +33,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Public;
  * <li>Classes that are {@link Private} are to be considered unstable unless
  * a different InterfaceStability annotation states otherwise.</li>
  * <li>Incompatible changes must not be made to classes marked as stable.</li>
+ * </ul>
  */
 @InterfaceAudience.Public
 @InterfaceStability.Evolving

+ 8 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -311,6 +311,9 @@ Release 2.4.0 - UNRELEASED
 
   BUG FIXES
 
+    HADOOP-10320. Javadoc in InterfaceStability.java lacks final </ul>.
+    (René Nyffenegger via cnauroth)
+
 Release 2.3.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -470,6 +473,9 @@ Release 2.3.0 - UNRELEASED
     HADOOP-10317. Rename branch-2.3 release version from 2.4.0-SNAPSHOT
     to 2.3.0-SNAPSHOT. (wang)
 
+    HADOOP-10313. Script and jenkins job to produce Hadoop release artifacts. 
+    (tucu)
+
   OPTIMIZATIONS
 
     HADOOP-10142. Avoid groups lookup for unprivileged users such as "dr.who"
@@ -677,6 +683,8 @@ Release 2.3.0 - UNRELEASED
     HADOOP-10310. SaslRpcServer should be initialized even when no secret
     manager present. (atm)
 
+    HADOOP-10311. Cleanup vendor names from the code base. (tucu)
+
 Release 2.2.0 - 2013-10-13
 
   INCOMPATIBLE CHANGES

+ 2 - 0
hadoop-common-project/hadoop-common/pom.xml

@@ -543,6 +543,7 @@
                     <javahClassName>org.apache.hadoop.io.compress.bzip2.Bzip2Decompressor</javahClassName>
                     <javahClassName>org.apache.hadoop.security.JniBasedUnixGroupsMapping</javahClassName>
                     <javahClassName>org.apache.hadoop.io.nativeio.NativeIO</javahClassName>
+                    <javahClassName>org.apache.hadoop.io.nativeio.SharedFileDescriptorFactory</javahClassName>
                     <javahClassName>org.apache.hadoop.security.JniBasedUnixGroupsNetgroupMapping</javahClassName>
                     <javahClassName>org.apache.hadoop.io.compress.snappy.SnappyCompressor</javahClassName>
                     <javahClassName>org.apache.hadoop.io.compress.snappy.SnappyDecompressor</javahClassName>
@@ -550,6 +551,7 @@
                     <javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Decompressor</javahClassName>
                     <javahClassName>org.apache.hadoop.util.NativeCrc32</javahClassName>
                     <javahClassName>org.apache.hadoop.net.unix.DomainSocket</javahClassName>
+                    <javahClassName>org.apache.hadoop.net.unix.DomainSocketWatcher</javahClassName>
                   </javahClassNames>
                   <javahOutputDirectory>${project.build.directory}/native/javah</javahOutputDirectory>
                 </configuration>

+ 2 - 0
hadoop-common-project/hadoop-common/src/CMakeLists.txt

@@ -178,7 +178,9 @@ add_dual_library(hadoop
     ${D}/io/nativeio/NativeIO.c
     ${D}/io/nativeio/errno_enum.c
     ${D}/io/nativeio/file_descriptor.c
+    ${D}/io/nativeio/SharedFileDescriptorFactory.c
     ${D}/net/unix/DomainSocket.c
+    ${D}/net/unix/DomainSocketWatcher.c
     ${D}/security/JniBasedUnixGroupsMapping.c
     ${D}/security/JniBasedUnixGroupsNetgroupMapping.c
     ${D}/security/hadoop_group_info.c

+ 10 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java

@@ -487,6 +487,16 @@ public class NativeIO {
       new ConcurrentHashMap<Integer, CachedName>();
 
     private enum IdCache { USER, GROUP }
+
+    public final static int MMAP_PROT_READ = 0x1; 
+    public final static int MMAP_PROT_WRITE = 0x2; 
+    public final static int MMAP_PROT_EXEC = 0x4; 
+
+    public static native long mmap(FileDescriptor fd, int prot,
+        boolean shared, long length) throws IOException;
+
+    public static native void munmap(long addr, long length)
+        throws IOException;
   }
 
   private static boolean workaroundNonThreadSafePasswdCalls = false;

+ 90 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/SharedFileDescriptorFactory.java

@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.nativeio;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.FileDescriptor;
+
+import org.apache.commons.lang.SystemUtils;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * A factory for creating shared file descriptors inside a given directory.
+ * Typically, the directory will be /dev/shm or /tmp.
+ *
+ * We will hand out file descriptors that correspond to unlinked files residing
+ * in that directory.  These file descriptors are suitable for sharing across
+ * multiple processes and are both readable and writable.
+ *
+ * Because we unlink the temporary files right after creating them, a JVM crash
+ * usually does not leave behind any temporary files in the directory.  However,
+ * it may happen that we crash right after creating the file and before
+ * unlinking it.  In the constructor, we attempt to clean up after any such
+ * remnants by trying to unlink any temporary files created by previous
+ * SharedFileDescriptorFactory instances that also used our prefix.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public class SharedFileDescriptorFactory {
+  private final String prefix;
+  private final String path;
+
+  /**
+   * Create a SharedFileDescriptorFactory.
+   *
+   * @param prefix    Prefix to add to all file names we use.
+   * @param path      Path to use.
+   */
+  public SharedFileDescriptorFactory(String prefix, String path)
+      throws IOException {
+    Preconditions.checkArgument(NativeIO.isAvailable());
+    Preconditions.checkArgument(SystemUtils.IS_OS_UNIX);
+    this.prefix = prefix;
+    this.path = path;
+    deleteStaleTemporaryFiles0(prefix, path);
+  }
+
+  /**
+   * Create a shared file descriptor which will be both readable and writable.
+   *
+   * @param length         The starting file length.
+   *
+   * @return               The file descriptor, wrapped in a FileInputStream.
+   * @throws IOException   If there was an I/O or configuration error creating
+   *                       the descriptor.
+   */
+  public FileInputStream createDescriptor(int length) throws IOException {
+    return new FileInputStream(createDescriptor0(prefix, path, length));
+  }
+
+  /**
+   * Delete temporary files in the directory, NOT following symlinks.
+   */
+  private static native void deleteStaleTemporaryFiles0(String prefix,
+      String path) throws IOException;
+
+  /**
+   * Create a file with O_EXCL, and then resize it to the desired size.
+   */
+  private static native FileDescriptor createDescriptor0(String prefix,
+      String path, int length) throws IOException;
+}

+ 50 - 134
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java

@@ -24,17 +24,15 @@ import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
-import java.net.SocketException;
-import java.nio.channels.AsynchronousCloseException;
 import java.nio.channels.ClosedChannelException;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.ByteBuffer;
-import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.commons.lang.SystemUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.util.NativeCodeLoader;
+import org.apache.hadoop.util.CloseableReferenceCount;
 
 import com.google.common.annotations.VisibleForTesting;
 
@@ -132,104 +130,14 @@ public class DomainSocket implements Closeable {
   }
 
   /**
-   * Tracks the reference count of the file descriptor, and also whether it is
-   * open or closed.
+   * The socket reference count and closed bit.
    */
-  private static class Status {
-    /**
-     * Bit mask representing a closed domain socket. 
-     */
-    private static final int STATUS_CLOSED_MASK = 1 << 30;
-    
-    /**
-     * Status bits
-     * 
-     * Bit 30: 0 = DomainSocket open, 1 = DomainSocket closed
-     * Bits 29 to 0: the reference count.
-     */
-    private final AtomicInteger bits = new AtomicInteger(0);
-
-    Status() { }
-
-    /**
-     * Increment the reference count of the underlying file descriptor.
-     *
-     * @throws ClosedChannelException      If the file descriptor is closed.
-     */
-    void reference() throws ClosedChannelException {
-      int curBits = bits.incrementAndGet();
-      if ((curBits & STATUS_CLOSED_MASK) != 0) {
-        bits.decrementAndGet();
-        throw new ClosedChannelException();
-      }
-    }
-
-    /**
-     * Decrement the reference count of the underlying file descriptor.
-     *
-     * @param checkClosed        Whether to throw an exception if the file
-     *                           descriptor is closed.
-     *
-     * @throws AsynchronousCloseException  If the file descriptor is closed and
-     *                                     checkClosed is set.
-     */
-    void unreference(boolean checkClosed) throws AsynchronousCloseException {
-      int newCount = bits.decrementAndGet();
-      assert (newCount & ~STATUS_CLOSED_MASK) >= 0;
-      if (checkClosed && ((newCount & STATUS_CLOSED_MASK) != 0)) {
-        throw new AsynchronousCloseException();
-      }
-    }
-
-    /**
-     * Return true if the file descriptor is currently open.
-     * 
-     * @return                 True if the file descriptor is currently open.
-     */
-    boolean isOpen() {
-      return ((bits.get() & STATUS_CLOSED_MASK) == 0);
-    }
-
-    /**
-     * Mark the file descriptor as closed.
-     *
-     * Once the file descriptor is closed, it cannot be reopened.
-     *
-     * @return                         The current reference count.
-     * @throws ClosedChannelException  If someone else closes the file 
-     *                                 descriptor before we do.
-     */
-    int setClosed() throws ClosedChannelException {
-      while (true) {
-        int curBits = bits.get();
-        if ((curBits & STATUS_CLOSED_MASK) != 0) {
-          throw new ClosedChannelException();
-        }
-        if (bits.compareAndSet(curBits, curBits | STATUS_CLOSED_MASK)) {
-          return curBits & (~STATUS_CLOSED_MASK);
-        }
-      }
-    }
-
-    /**
-     * Get the current reference count.
-     *
-     * @return                 The current reference count.
-     */
-    int getReferenceCount() {
-      return bits.get() & (~STATUS_CLOSED_MASK);
-    }
-  }
-
-  /**
-   * The socket status.
-   */
-  private final Status status;
+  final CloseableReferenceCount refCount;
 
   /**
    * The file descriptor associated with this UNIX domain socket.
    */
-  private final int fd;
+  final int fd;
 
   /**
    * The path associated with this UNIX domain socket.
@@ -252,13 +160,21 @@ public class DomainSocket implements Closeable {
   private final DomainChannel channel = new DomainChannel();
 
   private DomainSocket(String path, int fd) {
-    this.status = new Status();
+    this.refCount = new CloseableReferenceCount();
     this.fd = fd;
     this.path = path;
   }
 
   private static native int bind0(String path) throws IOException;
 
+  private void unreference(boolean checkClosed) throws ClosedChannelException {
+    if (checkClosed) {
+      refCount.unreferenceCheckClosed();
+    } else {
+      refCount.unreference();
+    }
+  }
+
   /**
    * Create a new DomainSocket listening on the given path.
    *
@@ -308,14 +224,14 @@ public class DomainSocket implements Closeable {
    * @throws SocketTimeoutException       If the accept timed out.
    */
   public DomainSocket accept() throws IOException {
-    status.reference();
+    refCount.reference();
     boolean exc = true;
     try {
       DomainSocket ret = new DomainSocket(path, accept0(fd));
       exc = false;
       return ret;
     } finally {
-      status.unreference(exc);
+      unreference(exc);
     }
   }
 
@@ -335,14 +251,14 @@ public class DomainSocket implements Closeable {
     return new DomainSocket(path, fd);
   }
 
- /**
-  * Return true if the file descriptor is currently open.
-  *
-  * @return                 True if the file descriptor is currently open.
-  */
- public boolean isOpen() {
-   return status.isOpen();
- }
+  /**
+   * Return true if the file descriptor is currently open.
+   *
+   * @return                 True if the file descriptor is currently open.
+   */
+  public boolean isOpen() {
+    return refCount.isOpen();
+  }
 
   /**
    * @return                 The socket path.
@@ -381,20 +297,20 @@ public class DomainSocket implements Closeable {
       throws IOException;
 
   public void setAttribute(int type, int size) throws IOException {
-    status.reference();
+    refCount.reference();
     boolean exc = true;
     try {
       setAttribute0(fd, type, size);
       exc = false;
     } finally {
-      status.unreference(exc);
+      unreference(exc);
     }
   }
 
   private native int getAttribute0(int fd, int type) throws IOException;
 
   public int getAttribute(int type) throws IOException {
-    status.reference();
+    refCount.reference();
     int attribute;
     boolean exc = true;
     try {
@@ -402,7 +318,7 @@ public class DomainSocket implements Closeable {
       exc = false;
       return attribute;
     } finally {
-      status.unreference(exc);
+      unreference(exc);
     }
   }
 
@@ -419,9 +335,9 @@ public class DomainSocket implements Closeable {
   @Override
   public void close() throws IOException {
     // Set the closed bit on this DomainSocket
-    int refCount;
+    int count;
     try {
-      refCount = status.setClosed();
+      count = refCount.setClosed();
     } catch (ClosedChannelException e) {
       // Someone else already closed the DomainSocket.
       return;
@@ -429,7 +345,7 @@ public class DomainSocket implements Closeable {
     // Wait for all references to go away
     boolean didShutdown = false;
     boolean interrupted = false;
-    while (refCount > 0) {
+    while (count > 0) {
       if (!didShutdown) {
         try {
           // Calling shutdown on the socket will interrupt blocking system
@@ -446,7 +362,7 @@ public class DomainSocket implements Closeable {
       } catch (InterruptedException e) {
         interrupted = true;
       }
-      refCount = status.getReferenceCount();
+      count = refCount.getReferenceCount();
     }
 
     // At this point, nobody has a reference to the file descriptor, 
@@ -478,13 +394,13 @@ public class DomainSocket implements Closeable {
    */
   public void sendFileDescriptors(FileDescriptor descriptors[],
       byte jbuf[], int offset, int length) throws IOException {
-    status.reference();
+    refCount.reference();
     boolean exc = true;
     try {
       sendFileDescriptors0(fd, descriptors, jbuf, offset, length);
       exc = false;
     } finally {
-      status.unreference(exc);
+      unreference(exc);
     }
   }
 
@@ -515,14 +431,14 @@ public class DomainSocket implements Closeable {
    */
   public int receiveFileDescriptors(FileDescriptor[] descriptors,
       byte jbuf[], int offset, int length) throws IOException {
-    status.reference();
+    refCount.reference();
     boolean exc = true;
     try {
       int nBytes = receiveFileDescriptors0(fd, descriptors, jbuf, offset, length);
       exc = false;
       return nBytes;
     } finally {
-      status.unreference(exc);
+      unreference(exc);
     }
   }
 
@@ -539,7 +455,7 @@ public class DomainSocket implements Closeable {
     for (int i = 0; i < streams.length; i++) {
       streams[i] = null;
     }
-    status.reference();
+    refCount.reference();
     try {
       int ret = receiveFileDescriptors0(fd, descriptors, buf, offset, length);
       for (int i = 0, j = 0; i < descriptors.length; i++) {
@@ -569,7 +485,7 @@ public class DomainSocket implements Closeable {
           }
         }
       }
-      status.unreference(!success);
+      unreference(!success);
     }
   }
 
@@ -593,7 +509,7 @@ public class DomainSocket implements Closeable {
   public class DomainInputStream extends InputStream {
     @Override
     public int read() throws IOException {
-      status.reference();
+      refCount.reference();
       boolean exc = true;
       try {
         byte b[] = new byte[1];
@@ -601,33 +517,33 @@ public class DomainSocket implements Closeable {
         exc = false;
         return (ret >= 0) ? b[0] : -1;
       } finally {
-        status.unreference(exc);
+        unreference(exc);
       }
     }
     
     @Override
     public int read(byte b[], int off, int len) throws IOException {
-      status.reference();
+      refCount.reference();
       boolean exc = true;
       try {
         int nRead = DomainSocket.readArray0(DomainSocket.this.fd, b, off, len);
         exc = false;
         return nRead;
       } finally {
-        status.unreference(exc);
+        unreference(exc);
       }
     }
 
     @Override
     public int available() throws IOException {
-      status.reference();
+      refCount.reference();
       boolean exc = true;
       try {
         int nAvailable = DomainSocket.available0(DomainSocket.this.fd);
         exc = false;
         return nAvailable;
       } finally {
-        status.unreference(exc);
+        unreference(exc);
       }
     }
 
@@ -649,7 +565,7 @@ public class DomainSocket implements Closeable {
 
     @Override
     public void write(int val) throws IOException {
-      status.reference();
+      refCount.reference();
       boolean exc = true;
       try {
         byte b[] = new byte[1];
@@ -657,19 +573,19 @@ public class DomainSocket implements Closeable {
         DomainSocket.writeArray0(DomainSocket.this.fd, b, 0, 1);
         exc = false;
       } finally {
-        status.unreference(exc);
+        unreference(exc);
       }
     }
 
     @Override
     public void write(byte[] b, int off, int len) throws IOException {
-      status.reference();
-        boolean exc = true;
+      refCount.reference();
+      boolean exc = true;
       try {
         DomainSocket.writeArray0(DomainSocket.this.fd, b, off, len);
         exc = false;
       } finally {
-        status.unreference(exc);
+        unreference(exc);
       }
     }
   }
@@ -688,7 +604,7 @@ public class DomainSocket implements Closeable {
 
     @Override
     public int read(ByteBuffer dst) throws IOException {
-      status.reference();
+      refCount.reference();
       boolean exc = true;
       try {
         int nread = 0;
@@ -710,7 +626,7 @@ public class DomainSocket implements Closeable {
         exc = false;
         return nread;
       } finally {
-        status.unreference(exc);
+        unreference(exc);
       }
     }
   }

+ 478 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocketWatcher.java

@@ -0,0 +1,478 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.net.unix;
+
+import java.io.Closeable;
+import java.io.EOFException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.io.IOUtils;
+
+import java.io.IOException;
+import java.nio.channels.ClosedChannelException;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.TreeMap;
+import java.util.Map;
+import java.util.concurrent.locks.Condition;
+import java.util.concurrent.locks.ReentrantLock;
+
+import org.apache.commons.lang.SystemUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.util.NativeCodeLoader;
+
+import com.google.common.base.Preconditions;
+import com.google.common.util.concurrent.Uninterruptibles;
+
+/**
+ * The DomainSocketWatcher watches a set of domain sockets to see when they
+ * become readable, or closed.  When one of those events happens, it makes a
+ * callback.
+ *
+ * See {@link DomainSocket} for more information about UNIX domain sockets.
+ */
+@InterfaceAudience.LimitedPrivate("HDFS")
+public final class DomainSocketWatcher extends Thread implements Closeable {
+  static {
+    if (SystemUtils.IS_OS_WINDOWS) {
+      loadingFailureReason = "UNIX Domain sockets are not available on Windows.";
+    } else if (!NativeCodeLoader.isNativeCodeLoaded()) {
+      loadingFailureReason = "libhadoop cannot be loaded.";
+    } else {
+      String problem;
+      try {
+        anchorNative();
+        problem = null;
+      } catch (Throwable t) {
+        problem = "DomainSocketWatcher#anchorNative got error: " +
+          t.getMessage();
+      }
+      loadingFailureReason = problem;
+    }
+  }
+
+  static Log LOG = LogFactory.getLog(DomainSocketWatcher.class);
+
+  /**
+   * The reason why DomainSocketWatcher is not available, or null if it is
+   * available.
+   */
+  private final static String loadingFailureReason;
+
+  /**
+   * Initializes the native library code.
+   */
+  private static native void anchorNative();
+
+  interface Handler {
+    /**
+     * Handles an event on a socket.  An event may be the socket becoming
+     * readable, or the remote end being closed.
+     *
+     * @param sock    The socket that the event occurred on.
+     * @return        Whether we should close the socket.
+     */
+    boolean handle(DomainSocket sock);
+  }
+
+  /**
+   * Handler for {DomainSocketWatcher#notificationSockets[1]}
+   */
+  private class NotificationHandler implements Handler {
+    public boolean handle(DomainSocket sock) {
+      try {
+        if (LOG.isTraceEnabled()) {
+          LOG.trace(this + ": NotificationHandler: doing a read on " +
+            sock.fd);
+        }
+        if (sock.getInputStream().read() == -1) {
+          if (LOG.isTraceEnabled()) {
+            LOG.trace(this + ": NotificationHandler: got EOF on " + sock.fd);
+          }
+          throw new EOFException();
+        }
+        if (LOG.isTraceEnabled()) {
+          LOG.trace(this + ": NotificationHandler: read succeeded on " +
+            sock.fd);
+        }
+        return false;
+      } catch (IOException e) {
+        if (LOG.isTraceEnabled()) {
+          LOG.trace(this + ": NotificationHandler: setting closed to " +
+              "true for " + sock.fd);
+        }
+        closed = true;
+        return true;
+      }
+    }
+  }
+
+  private static class Entry {
+    final DomainSocket socket;
+    final Handler handler;
+
+    Entry(DomainSocket socket, Handler handler) {
+      this.socket = socket;
+      this.handler = handler;
+    }
+
+    DomainSocket getDomainSocket() {
+      return socket;
+    }
+
+    Handler getHandler() {
+      return handler;
+    }
+  }
+
+  /**
+   * The FdSet is a set of file descriptors that gets passed to poll(2).
+   * It contains a native memory segment, so that we don't have to copy
+   * in the poll0 function.
+   */
+  private static class FdSet {
+    private long data;
+
+    private native static long alloc0();
+
+    FdSet() {
+      data = alloc0();
+    }
+
+    /**
+     * Add a file descriptor to the set.
+     *
+     * @param fd   The file descriptor to add.
+     */
+    native void add(int fd);
+
+    /**
+     * Remove a file descriptor from the set.
+     *
+     * @param fd   The file descriptor to remove.
+     */
+    native void remove(int fd);
+
+    /**
+     * Get an array containing all the FDs marked as readable.
+     * Also clear the state of all FDs.
+     *
+     * @return     An array containing all of the currently readable file
+     *             descriptors.
+     */
+    native int[] getAndClearReadableFds();
+
+    /**
+     * Close the object and de-allocate the memory used.
+     */
+    native void close();
+  }
+
+  /**
+   * Lock which protects toAdd, toRemove, and closed.
+   */
+  private final ReentrantLock lock = new ReentrantLock();
+
+  /**
+   * Condition variable which indicates that toAdd and toRemove have been
+   * processed.
+   */
+  private final Condition processedCond = lock.newCondition();
+
+  /**
+   * Entries to add.
+   */
+  private final LinkedList<Entry> toAdd =
+      new LinkedList<Entry>();
+
+  /**
+   * Entries to remove.
+   */
+  private final TreeMap<Integer, DomainSocket> toRemove =
+      new TreeMap<Integer, DomainSocket>();
+
+  /**
+   * Maximum length of time to go between checking whether the interrupted
+   * bit has been set for this thread.
+   */
+  private final int interruptCheckPeriodMs;
+
+  /**
+   * A pair of sockets used to wake up the thread after it has called poll(2).
+   */
+  private final DomainSocket notificationSockets[];
+
+  /**
+   * Whether or not this DomainSocketWatcher is closed.
+   */
+  private boolean closed = false;
+
+  public DomainSocketWatcher(int interruptCheckPeriodMs) throws IOException {
+    if (loadingFailureReason != null) {
+      throw new UnsupportedOperationException(loadingFailureReason);
+    }
+    notificationSockets = DomainSocket.socketpair();
+    this.interruptCheckPeriodMs = interruptCheckPeriodMs;
+    Preconditions.checkArgument(interruptCheckPeriodMs > 0);
+    watcherThread.start();
+  }
+
+  /**
+   * Close the DomainSocketWatcher and wait for its thread to terminate.
+   *
+   * If there is more than one close, all but the first will be ignored.
+   */
+  @Override
+  public void close() throws IOException {
+    try {
+      lock.lock();
+      if (closed) return;
+      LOG.info(this + ": closing");
+      closed = true;
+    } finally {
+      lock.unlock();
+    }
+    // Close notificationSockets[0], so that notificationSockets[1] gets an EOF
+    // event.  This will wake up the thread immediately if it is blocked inside
+    // the select() system call.
+    notificationSockets[0].close();
+    // Wait for the select thread to terminate.
+    Uninterruptibles.joinUninterruptibly(watcherThread);
+  }
+
+  /**
+   * Add a socket.
+   *
+   * @param sock     The socket to add.  It is an error to re-add a socket that
+   *                   we are already watching.
+   * @param handler  The handler to associate with this socket.  This may be
+   *                   called any time after this function is called.
+   */
+  public void add(DomainSocket sock, Handler handler) {
+    try {
+      lock.lock();
+      checkNotClosed();
+      Entry entry = new Entry(sock, handler);
+      try {
+        sock.refCount.reference();
+      } catch (ClosedChannelException e) {
+        Preconditions.checkArgument(false,
+            "tried to add a closed DomainSocket to " + this);
+      }
+      toAdd.add(entry);
+      kick();
+      while (true) {
+        try {
+          processedCond.await();
+        } catch (InterruptedException e) {
+          this.interrupt();
+        }
+        if (!toAdd.contains(entry)) {
+          break;
+        }
+        checkNotClosed();
+      }
+    } finally {
+      lock.unlock();
+    }
+  }
+
+  /**
+   * Remove a socket.  Its handler will be called.
+   *
+   * @param sock     The socket to remove.
+   */
+  public void remove(DomainSocket sock) {
+    try {
+      lock.lock();
+      checkNotClosed();
+      toRemove.put(sock.fd, sock);
+      kick();
+      while (true) {
+        try {
+          processedCond.await();
+        } catch (InterruptedException e) {
+          this.interrupt();
+        }
+        if (!toRemove.containsKey(sock.fd)) {
+          break;
+        }
+        checkNotClosed();
+      }
+    } finally {
+      lock.unlock();
+    }
+  }
+
+  /**
+   * Wake up the DomainSocketWatcher thread.
+   */
+  private void kick() {
+    try {
+      notificationSockets[0].getOutputStream().write(0);
+    } catch (IOException e) {
+      LOG.error(this + ": error writing to notificationSockets[0]", e);
+    }
+  }
+
+  /**
+   * Check that the DomainSocketWatcher is not closed.
+   * Must be called while holding the lock.
+   */
+  private void checkNotClosed() {
+    Preconditions.checkState(lock.isHeldByCurrentThread());
+    if (closed) {
+      throw new RuntimeException("DomainSocketWatcher is closed.");
+    }
+  }
+
+  private void sendCallback(String caller, TreeMap<Integer, Entry> entries,
+      FdSet fdSet, int fd) {
+    if (LOG.isTraceEnabled()) {
+      LOG.trace(this + ": " + caller + " starting sendCallback for fd " + fd);
+    }
+    Entry entry = entries.get(fd);
+    Preconditions.checkNotNull(entry,
+        this + ": fdSet contained " + fd + ", which we were " +
+        "not tracking.");
+    DomainSocket sock = entry.getDomainSocket();
+    if (entry.getHandler().handle(sock)) {
+      if (LOG.isTraceEnabled()) {
+        LOG.trace(this + ": " + caller + ": closing fd " + fd +
+            " at the request of the handler.");
+      }
+      if (toRemove.remove(fd) != null) {
+        if (LOG.isTraceEnabled()) {
+          LOG.trace(this + ": " + caller + " : sendCallback processed fd " +
+            fd  + " in toRemove.");
+        }
+      }
+      try {
+        sock.refCount.unreferenceCheckClosed();
+      } catch (IOException e) {
+        Preconditions.checkArgument(false,
+            this + ": file descriptor " + sock.fd + " was closed while " +
+            "still in the poll(2) loop.");
+      }
+      IOUtils.cleanup(LOG, sock);
+      entries.remove(fd);
+      fdSet.remove(fd);
+    } else {
+      if (LOG.isTraceEnabled()) {
+        LOG.trace(this + ": " + caller + ": sendCallback not " +
+            "closing fd " + fd);
+      }
+    }
+  }
+
+  private final Thread watcherThread = new Thread(new Runnable() {
+    @Override
+    public void run() {
+      LOG.info(this + ": starting with interruptCheckPeriodMs = " +
+          interruptCheckPeriodMs);
+      final TreeMap<Integer, Entry> entries = new TreeMap<Integer, Entry>();
+      FdSet fdSet = new FdSet();
+      addNotificationSocket(entries, fdSet);
+      try {
+        while (true) {
+          lock.lock();
+          try {
+            for (int fd : fdSet.getAndClearReadableFds()) {
+              sendCallback("getAndClearReadableFds", entries, fdSet, fd);
+            }
+            if (!(toAdd.isEmpty() && toRemove.isEmpty())) {
+              // Handle pending additions (before pending removes).
+              for (Iterator<Entry> iter = toAdd.iterator(); iter.hasNext(); ) {
+                Entry entry = iter.next();
+                DomainSocket sock = entry.getDomainSocket();
+                Entry prevEntry = entries.put(sock.fd, entry);
+                Preconditions.checkState(prevEntry == null,
+                    this + ": tried to watch a file descriptor that we " +
+                    "were already watching: " + sock);
+                if (LOG.isTraceEnabled()) {
+                  LOG.trace(this + ": adding fd " + sock.fd);
+                }
+                fdSet.add(sock.fd);
+                iter.remove();
+              }
+              // Handle pending removals
+              while (true) {
+                Map.Entry<Integer, DomainSocket> entry = toRemove.firstEntry();
+                if (entry == null) break;
+                sendCallback("handlePendingRemovals",
+                    entries, fdSet, entry.getValue().fd);
+              }
+              processedCond.signalAll();
+            }
+            // Check if the thread should terminate.  Doing this check now is
+            // easier than at the beginning of the loop, since we know toAdd and
+            // toRemove are now empty and processedCond has been notified if it
+            // needed to be.
+            if (closed) {
+              LOG.info(toString() + " thread terminating.");
+              return;
+            }
+            // Check if someone sent our thread an InterruptedException while we
+            // were waiting in poll().
+            if (Thread.interrupted()) {
+              throw new InterruptedException();
+            }
+          } finally {
+            lock.unlock();
+          }
+          doPoll0(interruptCheckPeriodMs, fdSet);
+        }
+      } catch (InterruptedException e) {
+        LOG.info(toString() + " terminating on InterruptedException");
+      } catch (IOException e) {
+        LOG.error(toString() + " terminating on IOException", e);
+      } finally {
+        for (Entry entry : entries.values()) {
+          sendCallback("close", entries, fdSet, entry.getDomainSocket().fd);
+        }
+        entries.clear();
+        fdSet.close();
+      }
+    }
+  });
+
+  private void addNotificationSocket(final TreeMap<Integer, Entry> entries,
+      FdSet fdSet) {
+    entries.put(notificationSockets[1].fd, 
+        new Entry(notificationSockets[1], new NotificationHandler()));
+    try {
+      notificationSockets[1].refCount.reference();
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    fdSet.add(notificationSockets[1].fd);
+    if (LOG.isTraceEnabled()) {
+      LOG.trace(this + ": adding notificationSocket " +
+          notificationSockets[1].fd + ", connected to " +
+          notificationSockets[0].fd);
+    }
+  }
+
+  public String toString() {
+    return "DomainSocketWatcher(" + System.identityHashCode(this) + ")"; 
+  }
+
+  private static native int doPoll0(int maxWaitMs, FdSet readFds)
+      throws IOException;
+}

+ 125 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CloseableReferenceCount.java

@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import java.nio.channels.AsynchronousCloseException;
+import java.nio.channels.ClosedChannelException;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * A closeable object that maintains a reference count.
+ *
+ * Once the object is closed, attempting to take a new reference will throw
+ * ClosedChannelException.
+ */
+public class CloseableReferenceCount {
+  /**
+   * Bit mask representing a closed domain socket.
+   */
+  private static final int STATUS_CLOSED_MASK = 1 << 30;
+
+  /**
+   * The status bits.
+   *
+   * Bit 30: 0 = open, 1 = closed.
+   * Bits 29 to 0: the reference count.
+   */
+  private final AtomicInteger status = new AtomicInteger(0);
+
+  public CloseableReferenceCount() { }
+
+  /**
+   * Increment the reference count.
+   *
+   * @throws ClosedChannelException      If the status is closed.
+   */
+  public void reference() throws ClosedChannelException {
+    int curBits = status.incrementAndGet();
+    if ((curBits & STATUS_CLOSED_MASK) != 0) {
+      status.decrementAndGet();
+      throw new ClosedChannelException();
+    }
+  }
+
+  /**
+   * Decrement the reference count.
+   *
+   * @return          True if the object is closed and has no outstanding
+   *                  references.
+   */
+  public boolean unreference() {
+    int newVal = status.decrementAndGet();
+    Preconditions.checkState(newVal != 0xffffffff,
+        "called unreference when the reference count was already at 0.");
+    return newVal == STATUS_CLOSED_MASK;
+  }
+
+  /**
+   * Decrement the reference count, checking to make sure that the
+   * CloseableReferenceCount is not closed.
+   *
+   * @throws AsynchronousCloseException  If the status is closed.
+   */
+  public void unreferenceCheckClosed() throws ClosedChannelException {
+    int newVal = status.decrementAndGet();
+    if ((newVal & STATUS_CLOSED_MASK) != 0) {
+      throw new AsynchronousCloseException();
+    }
+  }
+
+  /**
+   * Return true if the status is currently open.
+   *
+   * @return                 True if the status is currently open.
+   */
+  public boolean isOpen() {
+    return ((status.get() & STATUS_CLOSED_MASK) == 0);
+  }
+
+  /**
+   * Mark the status as closed.
+   *
+   * Once the status is closed, it cannot be reopened.
+   *
+   * @return                         The current reference count.
+   * @throws ClosedChannelException  If someone else closes the object
+   *                                 before we do.
+   */
+  public int setClosed() throws ClosedChannelException {
+    while (true) {
+      int curBits = status.get();
+      if ((curBits & STATUS_CLOSED_MASK) != 0) {
+        throw new ClosedChannelException();
+      }
+      if (status.compareAndSet(curBits, curBits | STATUS_CLOSED_MASK)) {
+        return curBits & (~STATUS_CLOSED_MASK);
+      }
+    }
+  }
+
+  /**
+   * Get the current reference count.
+   *
+   * @return                 The current reference count.
+   */
+  public int getReferenceCount() {
+    return status.get() & (~STATUS_CLOSED_MASK);
+  }
+}

+ 38 - 0
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/NativeIO.c

@@ -18,6 +18,7 @@
 
 #include "org_apache_hadoop.h"
 #include "org_apache_hadoop_io_nativeio_NativeIO.h"
+#include "org_apache_hadoop_io_nativeio_NativeIO_POSIX.h"
 
 #ifdef UNIX
 #include <assert.h>
@@ -49,6 +50,10 @@
 #include "file_descriptor.h"
 #include "errno_enum.h"
 
+#define MMAP_PROT_READ org_apache_hadoop_io_nativeio_NativeIO_POSIX_MMAP_PROT_READ
+#define MMAP_PROT_WRITE org_apache_hadoop_io_nativeio_NativeIO_POSIX_MMAP_PROT_WRITE
+#define MMAP_PROT_EXEC org_apache_hadoop_io_nativeio_NativeIO_POSIX_MMAP_PROT_EXEC
+
 // the NativeIO$POSIX$Stat inner class and its constructor
 static jclass stat_clazz;
 static jmethodID stat_ctor;
@@ -661,6 +666,39 @@ cleanup:
 #endif
 }
 
+JNIEXPORT jlong JNICALL 
+Java_org_apache_hadoop_io_nativeio_NativeIO_00024POSIX_mmap(
+  JNIEnv *env, jclass clazz, jobject jfd, jint jprot,
+  jboolean jshared, jlong length)
+{
+  void *addr = 0;
+  int prot, flags, fd;
+  
+  prot = ((jprot & MMAP_PROT_READ) ? PROT_READ : 0) |
+         ((jprot & MMAP_PROT_WRITE) ? PROT_WRITE : 0) |
+         ((jprot & MMAP_PROT_EXEC) ? PROT_EXEC : 0);
+  flags = (jshared == JNI_TRUE) ? MAP_SHARED : MAP_PRIVATE;
+  fd = fd_get(env, jfd);
+  addr = mmap(NULL, length, prot, flags, fd, 0);
+  if (addr == MAP_FAILED) {
+    throw_ioe(env, errno);
+  }
+  return (jlong)(intptr_t)addr;
+}
+
+JNIEXPORT void JNICALL 
+Java_org_apache_hadoop_io_nativeio_NativeIO_00024POSIX_munmap(
+  JNIEnv *env, jclass clazz, jlong jaddr, jlong length)
+{
+  void *addr;
+
+  addr = (void*)(intptr_t)jaddr;
+  if (munmap(addr, length) < 0) {
+    throw_ioe(env, errno);
+  }
+}
+
+
 /*
  * static native String getGroupName(int gid);
  *

+ 162 - 0
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/SharedFileDescriptorFactory.c

@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "org_apache_hadoop.h"
+
+#ifdef UNIX
+
+#include "exception.h"
+#include "file_descriptor.h"
+#include "org_apache_hadoop.h"
+#include "org_apache_hadoop_io_nativeio_SharedFileDescriptorFactory.h"
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+static pthread_mutex_t g_rand_lock = PTHREAD_MUTEX_INITIALIZER;
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_io_nativeio_SharedFileDescriptorFactory_deleteStaleTemporaryFiles0(
+  JNIEnv *env, jclass clazz, jstring jprefix, jstring jpath)
+{
+  const char *prefix = NULL, *path = NULL;
+  char target[PATH_MAX];
+  jthrowable jthr;
+  DIR *dp = NULL;
+  struct dirent *de;
+
+  prefix = (*env)->GetStringUTFChars(env, jprefix, NULL);
+  if (!prefix) goto done; // exception raised
+  path = (*env)->GetStringUTFChars(env, jpath, NULL);
+  if (!path) goto done; // exception raised
+
+  dp = opendir(path);
+  if (!dp) {
+    int ret = errno;
+    jthr = newIOException(env, "opendir(%s) error %d: %s",
+                          path, ret, terror(ret));
+    (*env)->Throw(env, jthr);
+    goto done;
+  }
+  while ((de = readdir(dp))) {
+    if (strncmp(prefix, de->d_name, strlen(prefix)) == 0) {
+      int ret = snprintf(target, PATH_MAX, "%s/%s", path, de->d_name);
+      if ((0 < ret) && (ret < PATH_MAX)) {
+        unlink(target);
+      }
+    }
+  }
+
+done:
+  if (dp) {
+    closedir(dp);
+  }
+  if (prefix) {
+    (*env)->ReleaseStringUTFChars(env, jprefix, prefix);
+  }
+  if (path) {
+    (*env)->ReleaseStringUTFChars(env, jpath, path);
+  }
+}
+
+JNIEXPORT jobject JNICALL
+Java_org_apache_hadoop_io_nativeio_SharedFileDescriptorFactory_createDescriptor0(
+  JNIEnv *env, jclass clazz, jstring jprefix, jstring jpath, jint length)
+{
+  const char *prefix = NULL, *path = NULL;
+  char target[PATH_MAX];
+  int ret, fd = -1, rnd;
+  jthrowable jthr;
+  jobject jret = NULL;
+
+  prefix = (*env)->GetStringUTFChars(env, jprefix, NULL);
+  if (!prefix) goto done; // exception raised
+  path = (*env)->GetStringUTFChars(env, jpath, NULL);
+  if (!path) goto done; // exception raised
+
+  pthread_mutex_lock(&g_rand_lock);
+  rnd = rand();
+  pthread_mutex_unlock(&g_rand_lock);
+  while (1) {
+    ret = snprintf(target, PATH_MAX, "%s/%s_%d",
+                   path, prefix, rnd);
+    if (ret < 0) {
+      jthr = newIOException(env, "snprintf error");
+      (*env)->Throw(env, jthr);
+      goto done;
+    } else if (ret >= PATH_MAX) {
+      jthr = newIOException(env, "computed path was too long.");
+      (*env)->Throw(env, jthr);
+      goto done;
+    }
+    fd = open(target, O_CREAT | O_EXCL | O_RDWR, 0700);
+    if (fd >= 0) break; // success
+    ret = errno;
+    if (ret == EEXIST) {
+      // Bad luck -- we got a very rare collision here between us and 
+      // another DataNode (or process).  Try again.
+      continue;
+    } else if (ret == EINTR) {
+      // Most of the time, this error is only possible when opening FIFOs.
+      // But let's be thorough.
+      continue;
+    }
+    jthr = newIOException(env, "open(%s, O_CREAT | O_EXCL | O_RDWR) "
+            "failed: error %d (%s)", target, ret, terror(ret));
+    (*env)->Throw(env, jthr);
+    goto done;
+  }
+  if (unlink(target) < 0) {
+    jthr = newIOException(env, "unlink(%s) failed: error %d (%s)",
+                          path, ret, terror(ret));
+    (*env)->Throw(env, jthr);
+    goto done;
+  }
+  if (ftruncate(fd, length) < 0) {
+    jthr = newIOException(env, "ftruncate(%s, %d) failed: error %d (%s)",
+                          path, length, ret, terror(ret));
+    (*env)->Throw(env, jthr);
+    goto done;
+  }
+  jret = fd_create(env, fd); // throws exception on error.
+
+done:
+  if (prefix) {
+    (*env)->ReleaseStringUTFChars(env, jprefix, prefix);
+  }
+  if (path) {
+    (*env)->ReleaseStringUTFChars(env, jpath, path);
+  }
+  if (!jret) {
+    if (fd >= 0) {
+      close(fd);
+    }
+  }
+  return jret;
+}
+
+#endif

+ 247 - 0
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocketWatcher.c

@@ -0,0 +1,247 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "config.h"
+#include "exception.h"
+#include "org_apache_hadoop.h"
+#include "org_apache_hadoop_net_unix_DomainSocketWatcher.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <jni.h>
+#include <poll.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+static jfieldID fd_set_data_fid;
+
+#define FD_SET_DATA_MIN_SIZE 2
+
+struct fd_set_data {
+  /**
+   * Number of fds we have allocated space for.
+   */
+  int alloc_size;
+
+  /**
+   * Number of fds actually in use.
+   */
+  int used_size;
+
+  /**
+   * Beginning of pollfd data.
+   */
+  struct pollfd pollfd[0];
+};
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocketWatcher_anchorNative(
+JNIEnv *env, jclass clazz)
+{
+  jclass fd_set_class;
+
+  fd_set_class = (*env)->FindClass(env,
+          "org/apache/hadoop/net/unix/DomainSocketWatcher$FdSet");
+  if (!fd_set_class) return; // exception raised
+  fd_set_data_fid = (*env)->GetFieldID(env, fd_set_class, "data", "J");
+  if (!fd_set_data_fid) return; // exception raised
+}
+
+JNIEXPORT jlong JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocketWatcher_00024FdSet_alloc0(
+JNIEnv *env, jclass clazz)
+{
+  struct fd_set_data *sd;
+
+  sd = calloc(1, sizeof(struct fd_set_data) +
+              (sizeof(struct pollfd) * FD_SET_DATA_MIN_SIZE));
+  if (!sd) {
+    (*env)->Throw(env, newRuntimeException(env, "out of memory allocating "
+                                            "DomainSocketWatcher#FdSet"));
+    return 0L;
+  }
+  sd->alloc_size = FD_SET_DATA_MIN_SIZE;
+  sd->used_size = 0;
+  return (jlong)(intptr_t)sd;
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocketWatcher_00024FdSet_add(
+JNIEnv *env, jobject obj, jint fd)
+{
+  struct fd_set_data *sd, *nd;
+  struct pollfd *pollfd;
+
+  sd = (struct fd_set_data*)(intptr_t)(*env)->
+    GetLongField(env, obj, fd_set_data_fid);
+  if (sd->used_size + 1 > sd->alloc_size) {
+    nd = realloc(sd, sizeof(struct fd_set_data) +
+            (sizeof(struct pollfd) * sd->alloc_size * 2));
+    if (!nd) {
+      (*env)->Throw(env, newRuntimeException(env, "out of memory adding "
+            "another fd to DomainSocketWatcher#FdSet.  we have %d already",
+            sd->alloc_size));
+      return;
+    }
+    nd->alloc_size = nd->alloc_size * 2;
+    (*env)->SetLongField(env, obj, fd_set_data_fid, (jlong)(intptr_t)nd);
+    sd = nd;
+  }
+  pollfd = &sd->pollfd[sd->used_size];
+  sd->used_size++;
+  pollfd->fd = fd;
+  pollfd->events = POLLIN;
+  pollfd->revents = 0;
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocketWatcher_00024FdSet_remove(
+JNIEnv *env, jobject obj, jint fd)
+{
+  struct fd_set_data *sd;
+  struct pollfd *pollfd, *last_pollfd;
+  int used_size, i;
+
+  sd = (struct fd_set_data*)(intptr_t)(*env)->
+      GetLongField(env, obj, fd_set_data_fid);
+  used_size = sd->used_size;
+  for (i = 0; i < used_size; i++) {
+    pollfd = sd->pollfd + i;
+    if (pollfd->fd == fd) break;
+  }
+  if (i == used_size) {
+    (*env)->Throw(env, newRuntimeException(env, "failed to remove fd %d "
+          "from the FdSet because it was never present.", fd));
+    return;
+  }
+  last_pollfd = sd->pollfd + (used_size - 1);
+  if (used_size > 1) {
+    // Move last pollfd to the new empty slot if needed
+    pollfd->fd = last_pollfd->fd;
+    pollfd->events = last_pollfd->events;
+    pollfd->revents = last_pollfd->revents;
+  }
+  memset(last_pollfd, 0, sizeof(struct pollfd));
+  sd->used_size--;
+}
+
+JNIEXPORT jobject JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocketWatcher_00024FdSet_getAndClearReadableFds(
+JNIEnv *env, jobject obj)
+{
+  int *carr = NULL;
+  jobject jarr = NULL;
+  struct fd_set_data *sd;
+  int used_size, num_readable = 0, i, j;
+  jthrowable jthr = NULL;
+
+  sd = (struct fd_set_data*)(intptr_t)(*env)->
+      GetLongField(env, obj, fd_set_data_fid);
+  used_size = sd->used_size;
+  for (i = 0; i < used_size; i++) {
+    if (sd->pollfd[i].revents & POLLIN) {
+      num_readable++;
+    } else {
+      sd->pollfd[i].revents = 0;
+    }
+  }
+  if (num_readable > 0) {
+    carr = malloc(sizeof(int) * num_readable);
+    if (!carr) {
+      jthr = newRuntimeException(env, "failed to allocate a temporary array "
+            "of %d ints", num_readable);
+      goto done;
+    }
+    j = 0;
+    for (i = 0; ((i < used_size) && (j < num_readable)); i++) {
+      if (sd->pollfd[i].revents & POLLIN) {
+        carr[j] = sd->pollfd[i].fd;
+        j++;
+        sd->pollfd[i].revents = 0;
+      }
+    }
+    if (j != num_readable) {
+      jthr = newRuntimeException(env, "failed to fill entire carr "
+            "array of size %d: only filled %d elements", num_readable, j);
+      goto done;
+    }
+  }
+  jarr = (*env)->NewIntArray(env, num_readable);
+  if (!jarr) {
+    jthr = (*env)->ExceptionOccurred(env);
+    (*env)->ExceptionClear(env);
+    goto done;
+  }
+  if (num_readable > 0) {
+    (*env)->SetIntArrayRegion(env, jarr, 0, num_readable, carr);
+    jthr = (*env)->ExceptionOccurred(env);
+    if (jthr) {
+      (*env)->ExceptionClear(env);
+      goto done;
+    }
+  }
+
+done:
+  free(carr);
+  if (jthr) {
+    (*env)->DeleteLocalRef(env, jarr);
+    jarr = NULL;
+  }
+  return jarr;
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocketWatcher_00024FdSet_close(
+JNIEnv *env, jobject obj)
+{
+  struct fd_set_data *sd;
+
+  sd = (struct fd_set_data*)(intptr_t)(*env)->
+      GetLongField(env, obj, fd_set_data_fid);
+  if (sd) {
+    free(sd);
+    (*env)->SetLongField(env, obj, fd_set_data_fid, 0L);
+  }
+}
+
+JNIEXPORT jint JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocketWatcher_doPoll0(
+JNIEnv *env, jclass clazz, jint checkMs, jobject fdSet)
+{
+  struct fd_set_data *sd;
+  int ret, err;
+
+  sd = (struct fd_set_data*)(intptr_t)(*env)->
+      GetLongField(env, fdSet, fd_set_data_fid);
+  ret = poll(sd->pollfd, sd->used_size, checkMs);
+  if (ret >= 0) {
+    return ret;
+  }
+  err = errno;
+  if (err != EINTR) { // treat EINTR as 0 fds ready
+    (*env)->Throw(env, newIOException(env,
+            "poll(2) failed with error code %d: %s", err, terror(err)));
+  }
+  return 0;
+}

+ 82 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/nativeio/TestSharedFileDescriptorFactory.java

@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.nativeio;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+
+import org.junit.Assert;
+import org.junit.Assume;
+import org.junit.Test;
+import org.apache.commons.lang.SystemUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+
+public class TestSharedFileDescriptorFactory {
+  static final Log LOG = LogFactory.getLog(TestSharedFileDescriptorFactory.class);
+
+  private static final File TEST_BASE =
+      new File(System.getProperty("test.build.data", "/tmp"));
+
+  @Test(timeout=10000)
+  public void testReadAndWrite() throws Exception {
+    Assume.assumeTrue(NativeIO.isAvailable());
+    Assume.assumeTrue(SystemUtils.IS_OS_UNIX);
+    File path = new File(TEST_BASE, "testReadAndWrite");
+    path.mkdirs();
+    SharedFileDescriptorFactory factory =
+        new SharedFileDescriptorFactory("woot_", path.getAbsolutePath());
+    FileInputStream inStream = factory.createDescriptor(4096);
+    FileOutputStream outStream = new FileOutputStream(inStream.getFD());
+    outStream.write(101);
+    inStream.getChannel().position(0);
+    Assert.assertEquals(101, inStream.read());
+    inStream.close();
+    outStream.close();
+    FileUtil.fullyDelete(path);
+  }
+
+  static private void createTempFile(String path) throws Exception {
+    FileOutputStream fos = new FileOutputStream(path);
+    fos.write(101);
+    fos.close();
+  }
+  
+  @Test(timeout=10000)
+  public void testCleanupRemainders() throws Exception {
+    Assume.assumeTrue(NativeIO.isAvailable());
+    Assume.assumeTrue(SystemUtils.IS_OS_UNIX);
+    File path = new File(TEST_BASE, "testCleanupRemainders");
+    path.mkdirs();
+    String remainder1 = path.getAbsolutePath() + 
+        Path.SEPARATOR + "woot2_remainder1";
+    String remainder2 = path.getAbsolutePath() +
+        Path.SEPARATOR + "woot2_remainder2";
+    createTempFile(remainder1);
+    createTempFile(remainder2);
+    new SharedFileDescriptorFactory("woot2_", path.getAbsolutePath());
+    // creating the SharedFileDescriptorFactory should have removed 
+    // the remainders
+    Assert.assertFalse(new File(remainder1).exists());
+    Assert.assertFalse(new File(remainder2).exists());
+    FileUtil.fullyDelete(path);
+  }
+}

+ 150 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocketWatcher.java

@@ -0,0 +1,150 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.net.unix;
+
+import java.util.ArrayList;
+import java.util.Random;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.locks.ReentrantLock;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.junit.Assume;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.common.util.concurrent.Uninterruptibles;
+
+public class TestDomainSocketWatcher {
+  static final Log LOG = LogFactory.getLog(TestDomainSocketWatcher.class);
+
+  @Before
+  public void before() {
+    Assume.assumeTrue(DomainSocket.getLoadingFailureReason() == null);
+  }
+
+  /**
+   * Test that we can create a DomainSocketWatcher and then shut it down.
+   */
+  @Test(timeout=60000)
+  public void testCreateShutdown() throws Exception {
+    DomainSocketWatcher watcher = new DomainSocketWatcher(10000000);
+    watcher.close();
+  }
+
+  /**
+   * Test that we can get notifications out a DomainSocketWatcher.
+   */
+  @Test(timeout=180000)
+  public void testDeliverNotifications() throws Exception {
+    DomainSocketWatcher watcher = new DomainSocketWatcher(10000000);
+    DomainSocket pair[] = DomainSocket.socketpair();
+    final CountDownLatch latch = new CountDownLatch(1);
+    watcher.add(pair[1], new DomainSocketWatcher.Handler() {
+      @Override
+      public boolean handle(DomainSocket sock) {
+        latch.countDown();
+        return true;
+      }
+    });
+    pair[0].close();
+    latch.await();
+    watcher.close();
+  }
+
+  /**
+   * Test that a java interruption can stop the watcher thread
+   */
+  @Test(timeout=60000)
+  public void testInterruption() throws Exception {
+    DomainSocketWatcher watcher = new DomainSocketWatcher(10);
+    watcher.interrupt();
+    Uninterruptibles.joinUninterruptibly(watcher);
+  }
+  
+  @Test(timeout=300000)
+  public void testStress() throws Exception {
+    final int SOCKET_NUM = 250;
+    final ReentrantLock lock = new ReentrantLock();
+    final DomainSocketWatcher watcher = new DomainSocketWatcher(10000000);
+    final ArrayList<DomainSocket[]> pairs = new ArrayList<DomainSocket[]>();
+    final AtomicInteger handled = new AtomicInteger(0);
+
+    final Thread adderThread = new Thread(new Runnable() {
+      @Override
+      public void run() {
+        try {
+          for (int i = 0; i < SOCKET_NUM; i++) {
+            DomainSocket pair[] = DomainSocket.socketpair();
+            watcher.add(pair[1], new DomainSocketWatcher.Handler() {
+              @Override
+              public boolean handle(DomainSocket sock) {
+                handled.incrementAndGet();
+                return true;
+              }
+            });
+            lock.lock();
+            try {
+              pairs.add(pair);
+            } finally {
+              lock.unlock();
+            }
+          }
+        } catch (Throwable e) {
+          LOG.error(e);
+          throw new RuntimeException(e);
+        }
+      }
+    });
+    
+    final Thread removerThread = new Thread(new Runnable() {
+      @Override
+      public void run() {
+        final Random random = new Random();
+        try {
+          while (handled.get() != SOCKET_NUM) {
+            lock.lock();
+            try {
+              if (!pairs.isEmpty()) {
+                int idx = random.nextInt(pairs.size());
+                DomainSocket pair[] = pairs.remove(idx);
+                if (random.nextBoolean()) {
+                  pair[0].close();
+                } else {
+                  watcher.remove(pair[1]);
+                }
+              }
+            } finally {
+              lock.unlock();
+            }
+          }
+        } catch (Throwable e) {
+          LOG.error(e);
+          throw new RuntimeException(e);
+        }
+      }
+    });
+
+    adderThread.start();
+    removerThread.start();
+    Uninterruptibles.joinUninterruptibly(adderThread);
+    Uninterruptibles.joinUninterruptibly(removerThread);
+    watcher.close();
+  }
+}

+ 26 - 2
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java

@@ -26,6 +26,7 @@ import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeUnit;
 
+import com.google.common.base.Preconditions;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -163,8 +164,9 @@ class DFSClientCache {
     return new CacheLoader<String, DFSClient>() {
       @Override
       public DFSClient load(String userName) throws Exception {
-        UserGroupInformation ugi = UserGroupInformation
-            .createRemoteUser(userName);
+        UserGroupInformation ugi = getUserGroupInformation(
+                userName,
+                UserGroupInformation.getCurrentUser());
 
         // Guava requires CacheLoader never returns null.
         return ugi.doAs(new PrivilegedExceptionAction<DFSClient>() {
@@ -177,6 +179,28 @@ class DFSClientCache {
     };
   }
 
+  /**
+   * This method uses the currentUser, and real user to create a proxy
+   * @param effectiveUser The user who is being proxied by the real user
+   * @param realUser The actual user who does the command
+   * @return Proxy UserGroupInformation
+   * @throws IOException If proxying fails
+   */
+  UserGroupInformation getUserGroupInformation(
+          String effectiveUser,
+          UserGroupInformation realUser)
+          throws IOException {
+    Preconditions.checkNotNull(effectiveUser);
+    Preconditions.checkNotNull(realUser);
+    UserGroupInformation ugi =
+            UserGroupInformation.createProxyUser(effectiveUser, realUser);
+    if (LOG.isDebugEnabled()){
+      LOG.debug(String.format("Created ugi:" +
+              " %s for username: %s", ugi, effectiveUser));
+    }
+    return ugi;
+  }
+
   private RemovalListener<String, DFSClient> clientRemovalListener() {
     return new RemovalListener<String, DFSClient>() {
       @Override

+ 7 - 5
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java

@@ -479,9 +479,9 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
     } 
 
     try {
-      // Use superUserClient to get file attr since we don't know whether the
-      // NFS client user has access permission to the file
-      attrs = writeManager.getFileAttr(superUserClient, handle, iug);
+      // HDFS-5804 removed supserUserClient access
+      attrs = writeManager.getFileAttr(dfsClient, handle, iug);
+
       if (attrs == null) {
         LOG.error("Can't get path for fileId:" + handle.getFileId());
         return new ACCESS3Response(Nfs3Status.NFS3ERR_STALE);
@@ -603,8 +603,10 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
       // Only do access check.
       try {
         // Don't read from cache. Client may not have read permission.
-        attrs = Nfs3Utils.getFileAttr(superUserClient,
-            Nfs3Utils.getFileIdPath(handle), iug);
+        attrs = Nfs3Utils.getFileAttr(
+                  dfsClient,
+                  Nfs3Utils.getFileIdPath(handle),
+                  iug);
       } catch (IOException e) {
         if (LOG.isDebugEnabled()) {
           LOG.debug("Get error accessing file, fileId:" + handle.getFileId());

+ 13 - 1
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/TestReaddir.java

@@ -22,6 +22,7 @@ import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
 import java.net.InetAddress;
+import java.util.Arrays;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
@@ -40,6 +41,9 @@ import org.apache.hadoop.nfs.nfs3.response.READDIRPLUS3Response;
 import org.apache.hadoop.nfs.nfs3.response.READDIRPLUS3Response.EntryPlus3;
 import org.apache.hadoop.oncrpc.XDR;
 import org.apache.hadoop.oncrpc.security.SecurityHandler;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.authorize.ProxyUsers;
+import org.apache.hadoop.util.StringUtils;
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
@@ -58,9 +62,17 @@ public class TestReaddir {
   static RpcProgramNfs3 nfsd;
   static String testdir = "/tmp";
   static SecurityHandler securityHandler;
-  
+
   @BeforeClass
   public static void setup() throws Exception {
+    String currentUser = System.getProperty("user.name");
+    config.set(
+            ProxyUsers.getProxySuperuserGroupConfKey(currentUser),
+            "*");
+    config.set(
+            ProxyUsers.getProxySuperuserIpConfKey(currentUser),
+            "*");
+    ProxyUsers.refreshSuperUserGroupsConfiguration(config);
     cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).build();
     cluster.waitActive();
     hdfs = cluster.getFileSystem();

+ 25 - 0
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestDFSClientCache.java

@@ -20,12 +20,15 @@ package org.apache.hadoop.hdfs.nfs.nfs3;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertThat;
+import static org.hamcrest.core.Is.is;
 
 import java.io.IOException;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.DFSClient;
+import org.apache.hadoop.security.UserGroupInformation;
 import org.junit.Test;
 
 public class TestDFSClientCache {
@@ -49,6 +52,28 @@ public class TestDFSClientCache {
     assertEquals(MAX_CACHE_SIZE - 1, cache.clientCache.size());
   }
 
+  @Test
+  public void testGetUserGroupInformation() throws IOException {
+    String userName = "user1";
+    String currentUser = "currentUser";
+
+    UserGroupInformation currentUserUgi = UserGroupInformation
+            .createUserForTesting(currentUser, new String[0]);
+    currentUserUgi.setAuthenticationMethod(
+            UserGroupInformation.AuthenticationMethod.KERBEROS);
+    Configuration conf = new Configuration();
+    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "hdfs://localhost");
+    DFSClientCache cache = new DFSClientCache(conf);
+    UserGroupInformation ugiResult
+            = cache.getUserGroupInformation(userName, currentUserUgi);
+
+    assertThat(ugiResult.getUserName(), is(userName));
+    assertThat(ugiResult.getRealUser(), is(currentUserUgi));
+    assertThat(
+            ugiResult.getAuthenticationMethod(),
+            is(UserGroupInformation.AuthenticationMethod.PROXY));
+  }
+
   private static boolean isDfsClientClose(DFSClient c) {
     try {
       c.exists("");

+ 9 - 0
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java

@@ -50,6 +50,7 @@ import org.apache.hadoop.nfs.nfs3.response.CREATE3Response;
 import org.apache.hadoop.nfs.nfs3.response.READ3Response;
 import org.apache.hadoop.oncrpc.XDR;
 import org.apache.hadoop.oncrpc.security.SecurityHandler;
+import org.apache.hadoop.security.authorize.ProxyUsers;
 import org.jboss.netty.channel.Channel;
 import org.junit.Assert;
 import org.junit.Test;
@@ -285,6 +286,14 @@ public class TestWrites {
     SecurityHandler securityHandler = Mockito.mock(SecurityHandler.class);
     Mockito.when(securityHandler.getUser()).thenReturn(
         System.getProperty("user.name"));
+    String currentUser = System.getProperty("user.name");
+    config.set(
+            ProxyUsers.getProxySuperuserGroupConfKey(currentUser),
+            "*");
+    config.set(
+            ProxyUsers.getProxySuperuserIpConfKey(currentUser),
+            "*");
+    ProxyUsers.refreshSuperUserGroupsConfiguration(config);
 
     try {
       cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).build();

+ 14 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -297,6 +297,17 @@ Release 2.4.0 - UNRELEASED
     HDFS-5781. Use an array to record the mapping between FSEditLogOpCode and 
     the corresponding byte value. (jing9)
 
+    HDFS-5153. Datanode should send block reports for each storage in a
+    separate message. (Arpit Agarwal)
+
+    HDFS-5804. HDFS NFS Gateway fails to mount and proxy when using Kerberos.
+    (Abin Shahab via jing9)
+
+    HDFS-5859. DataNode#checkBlockToken should check block tokens even if
+    security is not enabled. (cmccabe)
+
+    HDFS-5746.  Add ShortCircuitSharedMemorySegment (cmccabe)
+
   OPTIMIZATIONS
 
     HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery
@@ -310,6 +321,9 @@ Release 2.4.0 - UNRELEASED
     HDFS-5843. DFSClient.getFileChecksum() throws IOException if checksum is 
     disabled. (Laurent Goujon via jing9)
 
+    HDFS-5856. DataNode.checkDiskError might throw NPE.
+    (Josh Elser via suresh)
+
 Release 2.3.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

@@ -399,6 +399,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final long    DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT = 60 * 60 * 1000;
   public static final String  DFS_BLOCKREPORT_INITIAL_DELAY_KEY = "dfs.blockreport.initialDelay";
   public static final int     DFS_BLOCKREPORT_INITIAL_DELAY_DEFAULT = 0;
+  public static final String  DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY = "dfs.blockreport.split.threshold";
+  public static final long    DFS_BLOCKREPORT_SPLIT_THRESHOLD_DEFAULT = 1000 * 1000;
   public static final String  DFS_CACHEREPORT_INTERVAL_MSEC_KEY = "dfs.cachereport.intervalMsec";
   public static final long    DFS_CACHEREPORT_INTERVAL_MSEC_DEFAULT = 10 * 1000;
   public static final String  DFS_BLOCK_INVALIDATE_LIMIT_KEY = "dfs.block.invalidate.limit";

+ 302 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitSharedMemorySegment.java

@@ -0,0 +1,302 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.client;
+
+import java.io.Closeable;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.lang.reflect.Field;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.nativeio.NativeIO;
+import org.apache.hadoop.io.nativeio.NativeIO.POSIX;
+import org.apache.hadoop.util.CloseableReferenceCount;
+import org.apache.hadoop.util.Shell;
+
+import com.google.common.base.Preconditions;
+import com.google.common.primitives.Ints;
+
+import sun.misc.Unsafe;
+
+public class ShortCircuitSharedMemorySegment implements Closeable {
+  private static final Log LOG =
+    LogFactory.getLog(ShortCircuitSharedMemorySegment.class);
+
+  private static final int BYTES_PER_SLOT = 64;
+
+  private static final Unsafe unsafe;
+
+  static {
+    Unsafe theUnsafe = null;
+    try {
+      Field f = Unsafe.class.getDeclaredField("theUnsafe");
+      f.setAccessible(true);
+      theUnsafe = (Unsafe)f.get(null);
+    } catch (Throwable e) {
+      LOG.error("failed to load misc.Unsafe", e);
+    }
+    unsafe = theUnsafe;
+  }
+
+  /**
+   * A slot containing information about a replica.
+   *
+   * The format is:
+   * word 0
+   *   bit 0:32   Slot flags (see below).
+   *   bit 33:63  Anchor count.
+   * word 1:7
+   *   Reserved for future use, such as statistics.
+   *   Padding is also useful for avoiding false sharing.
+   *
+   * Little-endian versus big-endian is not relevant here since both the client
+   * and the server reside on the same computer and use the same orientation.
+   */
+  public class Slot implements Closeable {
+    /**
+     * Flag indicating that the slot is in use.
+     */
+    private static final long SLOT_IN_USE_FLAG =    1L<<63;
+
+    /**
+     * Flag indicating that the slot can be anchored.
+     */
+    private static final long ANCHORABLE_FLAG =     1L<<62;
+
+    private long slotAddress;
+
+    Slot(long slotAddress) {
+      this.slotAddress = slotAddress;
+    }
+
+    /**
+     * Make a given slot anchorable.
+     */
+    public void makeAnchorable() {
+      Preconditions.checkState(slotAddress != 0,
+          "Called makeAnchorable on a slot that was closed.");
+      long prev;
+      do {
+        prev = unsafe.getLongVolatile(null, this.slotAddress);
+        if ((prev & ANCHORABLE_FLAG) != 0) {
+          return;
+        }
+      } while (!unsafe.compareAndSwapLong(null, this.slotAddress,
+                  prev, prev | ANCHORABLE_FLAG));
+    }
+
+    /**
+     * Make a given slot unanchorable.
+     */
+    public void makeUnanchorable() {
+      Preconditions.checkState(slotAddress != 0,
+          "Called makeUnanchorable on a slot that was closed.");
+      long prev;
+      do {
+        prev = unsafe.getLongVolatile(null, this.slotAddress);
+        if ((prev & ANCHORABLE_FLAG) == 0) {
+          return;
+        }
+      } while (!unsafe.compareAndSwapLong(null, this.slotAddress,
+                  prev, prev & (~ANCHORABLE_FLAG)));
+    }
+
+    /**
+     * Try to add an anchor for a given slot.
+     *
+     * When a slot is anchored, we know that the block it refers to is resident
+     * in memory.
+     *
+     * @return          True if the slot is anchored.
+     */
+    public boolean addAnchor() {
+      long prev;
+      do {
+        prev = unsafe.getLongVolatile(null, this.slotAddress);
+        if ((prev & 0x7fffffff) == 0x7fffffff) {
+          // Too many other threads have anchored the slot (2 billion?)
+          return false;
+        }
+        if ((prev & ANCHORABLE_FLAG) == 0) {
+          // Slot can't be anchored right now.
+          return false;
+        }
+      } while (!unsafe.compareAndSwapLong(null, this.slotAddress,
+                  prev, prev + 1));
+      return true;
+    }
+
+    /**
+     * Remove an anchor for a given slot.
+     */
+    public void removeAnchor() {
+      long prev;
+      do {
+        prev = unsafe.getLongVolatile(null, this.slotAddress);
+        Preconditions.checkState((prev & 0x7fffffff) != 0,
+            "Tried to remove anchor for slot " + slotAddress +", which was " +
+            "not anchored.");
+      } while (!unsafe.compareAndSwapLong(null, this.slotAddress,
+                  prev, prev - 1));
+    }
+
+    /**
+     * @return      The index of this slot.
+     */
+    public int getIndex() {
+      Preconditions.checkState(slotAddress != 0);
+      return Ints.checkedCast(
+          (slotAddress - baseAddress) / BYTES_PER_SLOT);
+    }
+
+    @Override
+    public void close() throws IOException {
+      if (slotAddress == 0) return;
+      long prev;
+      do {
+        prev = unsafe.getLongVolatile(null, this.slotAddress);
+        Preconditions.checkState((prev & SLOT_IN_USE_FLAG) != 0,
+            "tried to close slot that wasn't open");
+      } while (!unsafe.compareAndSwapLong(null, this.slotAddress,
+                  prev, 0));
+      slotAddress = 0;
+      if (ShortCircuitSharedMemorySegment.this.refCount.unreference()) {
+        ShortCircuitSharedMemorySegment.this.free();
+      }
+    }
+  }
+
+  /**
+   * The stream that we're going to use to create this shared memory segment.
+   *
+   * Although this is a FileInputStream, we are going to assume that the
+   * underlying file descriptor is writable as well as readable.
+   * It would be more appropriate to use a RandomAccessFile here, but that class
+   * does not have any public accessor which returns a FileDescriptor, unlike
+   * FileInputStream.
+   */
+  private final FileInputStream stream;
+
+  /**
+   * Length of the shared memory segment.
+   */
+  private final int length;
+
+  /**
+   * The base address of the memory-mapped file.
+   */
+  private final long baseAddress;
+
+  /**
+   * Reference count and 'closed' status.
+   */
+  private final CloseableReferenceCount refCount = new CloseableReferenceCount();
+
+  public ShortCircuitSharedMemorySegment(FileInputStream stream)
+        throws IOException {
+    if (!NativeIO.isAvailable()) {
+      throw new UnsupportedOperationException("NativeIO is not available.");
+    }
+    if (Shell.WINDOWS) {
+      throw new UnsupportedOperationException(
+          "ShortCircuitSharedMemorySegment is not yet implemented " +
+          "for Windows.");
+    }
+    if (unsafe == null) {
+      throw new UnsupportedOperationException(
+          "can't use ShortCircuitSharedMemorySegment because we failed to " +
+          "load misc.Unsafe.");
+    }
+    this.refCount.reference();
+    this.stream = stream;
+    this.length = getEffectiveLength(stream);
+    this.baseAddress = POSIX.mmap(this.stream.getFD(), 
+      POSIX.MMAP_PROT_READ | POSIX.MMAP_PROT_WRITE, true, this.length);
+  }
+
+  /**
+   * Calculate the effective usable size of the shared memory segment.
+   * We round down to a multiple of the slot size and do some validation.
+   *
+   * @param stream The stream we're using.
+   * @return       The effective usable size of the shared memory segment.
+   */
+  private static int getEffectiveLength(FileInputStream stream)
+      throws IOException {
+    int intSize = Ints.checkedCast(stream.getChannel().size());
+    int slots = intSize / BYTES_PER_SLOT;
+    Preconditions.checkState(slots > 0, "size of shared memory segment was " +
+        intSize + ", but that is not enough to hold even one slot.");
+    return slots * BYTES_PER_SLOT;
+  }
+
+  private boolean allocateSlot(long address) {
+    long prev;
+    do {
+      prev = unsafe.getLongVolatile(null, address);
+      if ((prev & Slot.SLOT_IN_USE_FLAG) != 0) {
+        return false;
+      }
+    } while (!unsafe.compareAndSwapLong(null, address,
+                prev, prev | Slot.SLOT_IN_USE_FLAG));
+    return true;
+  }
+
+  /**
+   * Allocate a new Slot in this shared memory segment.
+   *
+   * @return        A newly allocated Slot, or null if there were no available
+   *                slots.
+   */
+  public Slot allocateNextSlot() throws IOException {
+    ShortCircuitSharedMemorySegment.this.refCount.reference();
+    Slot slot = null;
+    try {
+      final int numSlots = length / BYTES_PER_SLOT;
+      for (int i = 0; i < numSlots; i++) {
+        long address = this.baseAddress + (i * BYTES_PER_SLOT);
+        if (allocateSlot(address)) {
+          slot = new Slot(address);
+          break;
+        }
+      }
+    } finally {
+      if (slot == null) {
+        if (refCount.unreference()) {
+          free();
+        }
+      }
+    }
+    return slot;
+  }
+
+  @Override
+  public void close() throws IOException {
+    refCount.setClosed();
+    if (refCount.unreference()) {
+      free();
+    }
+  }
+
+  void free() throws IOException {
+    IOUtils.cleanup(LOG, stream);
+    POSIX.munmap(baseAddress, length);
+  }
+}

+ 21 - 8
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

@@ -1635,15 +1635,19 @@ public class BlockManager {
   /**
    * The given storage is reporting all its blocks.
    * Update the (storage-->block list) and (block-->storage list) maps.
+   *
+   * @return true if all known storages of the given DN have finished reporting.
+   * @throws IOException
    */
-  public void processReport(final DatanodeID nodeID,
+  public boolean processReport(final DatanodeID nodeID,
       final DatanodeStorage storage, final String poolId,
       final BlockListAsLongs newReport) throws IOException {
     namesystem.writeLock();
     final long startTime = Time.now(); //after acquiring write lock
     final long endTime;
+    DatanodeDescriptor node;
     try {
-      final DatanodeDescriptor node = datanodeManager.getDatanode(nodeID);
+      node = datanodeManager.getDatanode(nodeID);
       if (node == null || !node.isAlive) {
         throw new IOException(
             "ProcessReport from dead or unregistered node: " + nodeID);
@@ -1651,13 +1655,21 @@ public class BlockManager {
 
       // To minimize startup time, we discard any second (or later) block reports
       // that we receive while still in startup phase.
-      final DatanodeStorageInfo storageInfo = node.updateStorage(storage);
+      DatanodeStorageInfo storageInfo = node.getStorageInfo(storage.getStorageID());
+
+      if (storageInfo == null) {
+        // We handle this for backwards compatibility.
+        storageInfo = node.updateStorage(storage);
+        LOG.warn("Unknown storageId " + storage.getStorageID() +
+                    ", updating storageMap. This indicates a buggy " +
+                    "DataNode that isn't heartbeating correctly.");
+      }
       if (namesystem.isInStartupSafeMode()
           && storageInfo.getBlockReportCount() > 0) {
         blockLog.info("BLOCK* processReport: "
             + "discarded non-initial block report from " + nodeID
             + " because namenode still in startup phase");
-        return;
+        return !node.hasStaleStorages();
       }
 
       if (storageInfo.numBlocks() == 0) {
@@ -1674,7 +1686,7 @@ public class BlockManager {
       storageInfo.receivedBlockReport();
       if (staleBefore && !storageInfo.areBlockContentsStale()) {
         LOG.info("BLOCK* processReport: Received first block report from "
-            + node + " after starting up or becoming active. Its block "
+            + storage + " after starting up or becoming active. Its block "
             + "contents are no longer considered stale");
         rescanPostponedMisreplicatedBlocks();
       }
@@ -1689,9 +1701,10 @@ public class BlockManager {
     if (metrics != null) {
       metrics.addBlockReport((int) (endTime - startTime));
     }
-    blockLog.info("BLOCK* processReport: from "
-        + nodeID + ", blocks: " + newReport.getNumberOfBlocks()
+    blockLog.info("BLOCK* processReport: from storage " + storage.getStorageID()
+        + " node " + nodeID + ", blocks: " + newReport.getNumberOfBlocks()
         + ", processing time: " + (endTime - startTime) + " msecs");
+    return !node.hasStaleStorages();
   }
 
   /**
@@ -1846,7 +1859,7 @@ public class BlockManager {
       Collection<BlockToMarkCorrupt> toCorrupt, // add to corrupt replicas list
       Collection<StatefulBlockInfo> toUC) { // add to under-construction list
 
-    final DatanodeStorageInfo storageInfo = dn.updateStorage(storage);
+    final DatanodeStorageInfo storageInfo = dn.getStorageInfo(storage.getStorageID());
 
     // place a delimiter in the list which separates blocks 
     // that have been reported from those that have not

+ 11 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java

@@ -257,6 +257,17 @@ public class DatanodeDescriptor extends DatanodeInfo {
     }
   }
 
+  boolean hasStaleStorages() {
+    synchronized (storageMap) {
+      for (DatanodeStorageInfo storage : storageMap.values()) {
+        if (storage.areBlockContentsStale()) {
+          return true;
+        }
+      }
+      return false;
+    }
+  }
+
   /**
    * Remove block from the list of blocks belonging to the data-node. Remove
    * data-node from the block.

+ 94 - 71
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java

@@ -22,11 +22,9 @@ import static org.apache.hadoop.util.Time.now;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.net.SocketTimeoutException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
 
+import com.google.common.base.Joiner;
 import org.apache.commons.logging.Log;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
@@ -435,75 +433,100 @@ class BPServiceActor implements Runnable {
 
   /**
    * Report the list blocks to the Namenode
+   * @return DatanodeCommands returned by the NN. May be null.
    * @throws IOException
    */
-  DatanodeCommand blockReport() throws IOException {
+  List<DatanodeCommand> blockReport() throws IOException {
     // send block report if timer has expired.
-    DatanodeCommand cmd = null;
-    long startTime = now();
-    if (startTime - lastBlockReport > dnConf.blockReportInterval) {
-
-      // Flush any block information that precedes the block report. Otherwise
-      // we have a chance that we will miss the delHint information
-      // or we will report an RBW replica after the BlockReport already reports
-      // a FINALIZED one.
-      reportReceivedDeletedBlocks();
-
-      // Send one block report per known storage.
-
-      // Create block report
-      long brCreateStartTime = now();
-      long totalBlockCount = 0;
-
-      Map<DatanodeStorage, BlockListAsLongs> perVolumeBlockLists =
-          dn.getFSDataset().getBlockReports(bpos.getBlockPoolId());
-
-      // Send block report
-      long brSendStartTime = now();
-      StorageBlockReport[] reports =
-          new StorageBlockReport[perVolumeBlockLists.size()];
-
-      int i = 0;
-      for(Map.Entry<DatanodeStorage, BlockListAsLongs> kvPair : perVolumeBlockLists.entrySet()) {
-        DatanodeStorage dnStorage = kvPair.getKey();
-        BlockListAsLongs blockList = kvPair.getValue();
-        totalBlockCount += blockList.getNumberOfBlocks();
-
-        reports[i++] =
-            new StorageBlockReport(
-              dnStorage, blockList.getBlockListAsLongs());
-      }
+    final long startTime = now();
+    if (startTime - lastBlockReport <= dnConf.blockReportInterval) {
+      return null;
+    }
 
-      cmd = bpNamenode.blockReport(bpRegistration, bpos.getBlockPoolId(), reports);
-
-      // Log the block report processing stats from Datanode perspective
-      long brSendCost = now() - brSendStartTime;
-      long brCreateCost = brSendStartTime - brCreateStartTime;
-      dn.getMetrics().addBlockReport(brSendCost);
-      LOG.info("BlockReport of " + totalBlockCount
-          + " blocks took " + brCreateCost + " msec to generate and "
-          + brSendCost + " msecs for RPC and NN processing");
-
-      // If we have sent the first block report, then wait a random
-      // time before we start the periodic block reports.
-      if (resetBlockReportTime) {
-        lastBlockReport = startTime - DFSUtil.getRandom().nextInt((int)(dnConf.blockReportInterval));
-        resetBlockReportTime = false;
-      } else {
-        /* say the last block report was at 8:20:14. The current report
-         * should have started around 9:20:14 (default 1 hour interval).
-         * If current time is :
-         *   1) normal like 9:20:18, next report should be at 10:20:14
-         *   2) unexpected like 11:35:43, next report should be at 12:20:14
-         */
-        lastBlockReport += (now() - lastBlockReport) /
-        dnConf.blockReportInterval * dnConf.blockReportInterval;
+    ArrayList<DatanodeCommand> cmds = new ArrayList<DatanodeCommand>();
+
+    // Flush any block information that precedes the block report. Otherwise
+    // we have a chance that we will miss the delHint information
+    // or we will report an RBW replica after the BlockReport already reports
+    // a FINALIZED one.
+    reportReceivedDeletedBlocks();
+    lastDeletedReport = startTime;
+
+    long brCreateStartTime = now();
+    Map<DatanodeStorage, BlockListAsLongs> perVolumeBlockLists =
+        dn.getFSDataset().getBlockReports(bpos.getBlockPoolId());
+
+    // Convert the reports to the format expected by the NN.
+    int i = 0;
+    int totalBlockCount = 0;
+    StorageBlockReport reports[] =
+        new StorageBlockReport[perVolumeBlockLists.size()];
+
+    for(Map.Entry<DatanodeStorage, BlockListAsLongs> kvPair : perVolumeBlockLists.entrySet()) {
+      BlockListAsLongs blockList = kvPair.getValue();
+      reports[i++] = new StorageBlockReport(
+          kvPair.getKey(), blockList.getBlockListAsLongs());
+      totalBlockCount += blockList.getNumberOfBlocks();
+    }
+
+    // Send the reports to the NN.
+    int numReportsSent;
+    long brSendStartTime = now();
+    if (totalBlockCount < dnConf.blockReportSplitThreshold) {
+      // Below split threshold, send all reports in a single message.
+      numReportsSent = 1;
+      DatanodeCommand cmd =
+          bpNamenode.blockReport(bpRegistration, bpos.getBlockPoolId(), reports);
+      if (cmd != null) {
+        cmds.add(cmd);
+      }
+    } else {
+      // Send one block report per message.
+      numReportsSent = i;
+      for (StorageBlockReport report : reports) {
+        StorageBlockReport singleReport[] = { report };
+        DatanodeCommand cmd = bpNamenode.blockReport(
+            bpRegistration, bpos.getBlockPoolId(), singleReport);
+        if (cmd != null) {
+          cmds.add(cmd);
+        }
       }
-      LOG.info("sent block report, processed command:" + cmd);
     }
-    return cmd;
+
+    // Log the block report processing stats from Datanode perspective
+    long brSendCost = now() - brSendStartTime;
+    long brCreateCost = brSendStartTime - brCreateStartTime;
+    dn.getMetrics().addBlockReport(brSendCost);
+    LOG.info("Sent " + numReportsSent + " blockreports " + totalBlockCount +
+        " blocks total. Took " + brCreateCost +
+        " msec to generate and " + brSendCost +
+        " msecs for RPC and NN processing. " +
+        " Got back commands " +
+            (cmds.size() == 0 ? "none" : Joiner.on("; ").join(cmds)));
+
+    scheduleNextBlockReport(startTime);
+    return cmds.size() == 0 ? null : cmds;
+  }
+
+  private void scheduleNextBlockReport(long previousReportStartTime) {
+    // If we have sent the first set of block reports, then wait a random
+    // time before we start the periodic block reports.
+    if (resetBlockReportTime) {
+      lastBlockReport = previousReportStartTime -
+          DFSUtil.getRandom().nextInt((int)(dnConf.blockReportInterval));
+      resetBlockReportTime = false;
+    } else {
+      /* say the last block report was at 8:20:14. The current report
+       * should have started around 9:20:14 (default 1 hour interval).
+       * If current time is :
+       *   1) normal like 9:20:18, next report should be at 10:20:14
+       *   2) unexpected like 11:35:43, next report should be at 12:20:14
+       */
+      lastBlockReport += (now() - lastBlockReport) /
+          dnConf.blockReportInterval * dnConf.blockReportInterval;
+    }
   }
-  
+
   DatanodeCommand cacheReport() throws IOException {
     // If caching is disabled, do not send a cache report
     if (dn.getFSDataset().getCacheCapacity() == 0) {
@@ -511,7 +534,7 @@ class BPServiceActor implements Runnable {
     }
     // send cache report if timer has expired.
     DatanodeCommand cmd = null;
-    long startTime = Time.monotonicNow();
+    final long startTime = Time.monotonicNow();
     if (startTime - lastCacheReport > dnConf.cacheReportInterval) {
       if (LOG.isDebugEnabled()) {
         LOG.debug("Sending cacheReport from service actor: " + this);
@@ -611,7 +634,7 @@ class BPServiceActor implements Runnable {
     //
     while (shouldRun()) {
       try {
-        long startTime = now();
+        final long startTime = now();
 
         //
         // Every so often, send heartbeat or block-report
@@ -657,10 +680,10 @@ class BPServiceActor implements Runnable {
           lastDeletedReport = startTime;
         }
 
-        DatanodeCommand cmd = blockReport();
-        processCommand(new DatanodeCommand[]{ cmd });
+        List<DatanodeCommand> cmds = blockReport();
+        processCommand(cmds == null ? null : cmds.toArray(new DatanodeCommand[cmds.size()]));
 
-        cmd = cacheReport();
+        DatanodeCommand cmd = cacheReport();
         processCommand(new DatanodeCommand[]{ cmd });
 
         // Now safe to start scanning the block pool.

+ 5 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java

@@ -23,6 +23,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_SPLIT_THRESHOLD_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY;
@@ -70,6 +72,7 @@ public class DNConf {
   final long readaheadLength;
   final long heartBeatInterval;
   final long blockReportInterval;
+  final long blockReportSplitThreshold;
   final long deleteReportInterval;
   final long initialBlockReportDelay;
   final long cacheReportInterval;
@@ -117,6 +120,8 @@ public class DNConf {
         DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT);
     this.blockReportInterval = conf.getLong(DFS_BLOCKREPORT_INTERVAL_MSEC_KEY,
         DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT);
+    this.blockReportSplitThreshold = conf.getLong(DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY,
+                                            DFS_BLOCKREPORT_SPLIT_THRESHOLD_DEFAULT);
     this.cacheReportInterval = conf.getLong(DFS_CACHEREPORT_INTERVAL_MSEC_KEY,
         DFS_CACHEREPORT_INTERVAL_MSEC_DEFAULT);
     

+ 25 - 8
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java

@@ -36,6 +36,7 @@ import java.net.SocketTimeoutException;
 import java.net.URI;
 import java.net.UnknownHostException;
 import java.nio.channels.ClosedByInterruptException;
+import java.nio.channels.ClosedChannelException;
 import java.nio.channels.SocketChannel;
 import java.security.PrivilegedExceptionAction;
 import java.util.ArrayList;
@@ -51,7 +52,6 @@ import java.util.concurrent.atomic.AtomicInteger;
 
 import javax.management.ObjectName;
 
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -1194,7 +1194,7 @@ public class DataNode extends Configured
   
   private void checkBlockToken(ExtendedBlock block, Token<BlockTokenIdentifier> token,
       AccessMode accessMode) throws IOException {
-    if (isBlockTokenEnabled && UserGroupInformation.isSecurityEnabled()) {
+    if (isBlockTokenEnabled) {
       BlockTokenIdentifier id = new BlockTokenIdentifier();
       ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier());
       DataInputStream in = new DataInputStream(buf);
@@ -1324,12 +1324,7 @@ public class DataNode extends Configured
   protected void checkDiskError(Exception e ) throws IOException {
     
     LOG.warn("checkDiskError: exception: ", e);
-    if (e instanceof SocketException || e instanceof SocketTimeoutException
-    	  || e instanceof ClosedByInterruptException 
-    	  || e.getMessage().startsWith("An established connection was aborted")
-    	  || e.getMessage().startsWith("Broken pipe")
-    	  || e.getMessage().startsWith("Connection reset")
-    	  || e.getMessage().contains("java.nio.channels.SocketChannel")) {
+    if (isNetworkRelatedException(e)) {
       LOG.info("Not checking disk as checkDiskError was called on a network" +
       		" related exception");	
       return;
@@ -1342,6 +1337,28 @@ public class DataNode extends Configured
     }
   }
   
+  /**
+   * Check if the provided exception looks like it's from a network error
+   * @param e the exception from a checkDiskError call
+   * @return true if this exception is network related, false otherwise
+   */
+  protected boolean isNetworkRelatedException(Exception e) {
+    if (e instanceof SocketException 
+        || e instanceof SocketTimeoutException
+        || e instanceof ClosedChannelException 
+        || e instanceof ClosedByInterruptException) {
+      return true;
+    }
+    
+    String msg = e.getMessage();
+    
+    return null != msg 
+        && (msg.startsWith("An established connection was aborted")
+            || msg.startsWith("Broken pipe")
+            || msg.startsWith("Connection reset")
+            || msg.contains("java.nio.channels.SocketChannel"));
+  }
+  
   /**
    *  Check if there is a disk failure and if so, handle the error
    */

+ 7 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java

@@ -998,13 +998,18 @@ class NameNodeRpcServer implements NamenodeProtocols {
            + "from " + nodeReg + ", reports.length=" + reports.length);
     }
     final BlockManager bm = namesystem.getBlockManager(); 
+    boolean hasStaleStorages = true;
     for(StorageBlockReport r : reports) {
       final BlockListAsLongs blocks = new BlockListAsLongs(r.getBlocks());
-      bm.processReport(nodeReg, r.getStorage(), poolId, blocks);
+      hasStaleStorages = bm.processReport(nodeReg, r.getStorage(), poolId, blocks);
     }
 
-    if (nn.getFSImage().isUpgradeFinalized() && !nn.isStandbyState())
+    if (nn.getFSImage().isUpgradeFinalized() &&
+        !nn.isStandbyState() &&
+        !hasStaleStorages) {
       return new FinalizeCommand(poolId);
+    }
+
     return null;
   }
 

+ 14 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

@@ -482,6 +482,20 @@
   <description>Delay for first block report in seconds.</description>
 </property>
 
+<property>
+    <name>dfs.blockreport.split.threshold</name>
+    <value>1000000</value>
+    <description>If the number of blocks on the DataNode is below this
+    threshold then it will send block reports for all Storage Directories
+    in a single message.
+
+    If the number of blocks exceeds this threshold then the DataNode will
+    send block reports for each Storage Directory in separate messages.
+
+    Set to zero to always split.
+    </description>
+</property>
+
 <property>
   <name>dfs.datanode.directoryscan.interval</name>
   <value>21600</value>

+ 104 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/client/TestShortCircuitSharedMemorySegment.java

@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.client;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.util.ArrayList;
+
+import org.apache.commons.lang.SystemUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.io.nativeio.NativeIO;
+import org.apache.hadoop.io.nativeio.SharedFileDescriptorFactory;
+import org.apache.hadoop.hdfs.client.ShortCircuitSharedMemorySegment.Slot;
+import org.junit.Assume;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.Assert;
+
+public class TestShortCircuitSharedMemorySegment {
+  public static final Log LOG =
+      LogFactory.getLog(TestShortCircuitSharedMemorySegment.class);
+  
+  private static final File TEST_BASE =
+      new File(System.getProperty("test.build.data", "/tmp"));
+
+  @Before
+  public void before() {
+    Assume.assumeTrue(NativeIO.isAvailable());
+    Assume.assumeTrue(SystemUtils.IS_OS_UNIX);
+  }
+
+  @Test(timeout=60000)
+  public void testStartupShutdown() throws Exception {
+    File path = new File(TEST_BASE, "testStartupShutdown");
+    path.mkdirs();
+    SharedFileDescriptorFactory factory =
+        new SharedFileDescriptorFactory("shm_", path.getAbsolutePath());
+    FileInputStream stream = factory.createDescriptor(4096);
+    ShortCircuitSharedMemorySegment shm = 
+        new ShortCircuitSharedMemorySegment(stream);
+    shm.close();
+    stream.close();
+    FileUtil.fullyDelete(path);
+  }
+
+  @Test(timeout=60000)
+  public void testAllocateSlots() throws Exception {
+    File path = new File(TEST_BASE, "testAllocateSlots");
+    path.mkdirs();
+    SharedFileDescriptorFactory factory =
+        new SharedFileDescriptorFactory("shm_", path.getAbsolutePath());
+    FileInputStream stream = factory.createDescriptor(4096);
+    ShortCircuitSharedMemorySegment shm = 
+        new ShortCircuitSharedMemorySegment(stream);
+    int numSlots = 0;
+    ArrayList<Slot> slots = new ArrayList<Slot>();
+    while (true) {
+      Slot slot = shm.allocateNextSlot();
+      if (slot == null) {
+        LOG.info("allocated " + numSlots + " slots before running out.");
+        break;
+      }
+      slots.add(slot);
+      numSlots++;
+    }
+    int slotIdx = 0;
+    for (Slot slot : slots) {
+      Assert.assertFalse(slot.addAnchor());
+      Assert.assertEquals(slotIdx++, slot.getIndex());
+    }
+    for (Slot slot : slots) {
+      slot.makeAnchorable();
+    }
+    for (Slot slot : slots) {
+      Assert.assertTrue(slot.addAnchor());
+    }
+    for (Slot slot : slots) {
+      slot.removeAnchor();
+    }
+    shm.close();
+    for (Slot slot : slots) {
+      slot.close();
+    }
+    stream.close();
+    FileUtil.fullyDelete(path);
+  }
+}

+ 71 - 165
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockReport.java → hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/BlockReportTestBase.java

@@ -52,7 +52,6 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
-import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
 import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
@@ -69,20 +68,16 @@ import org.mockito.Mockito;
 import org.mockito.invocation.InvocationOnMock;
 
 /**
- * This test simulates a variety of situations when blocks are being
- * intentionally corrupted, unexpectedly modified, and so on before a block
- * report is happening.
+ * This is the base class for simulating a variety of situations
+ * when blocks are being intentionally corrupted, unexpectedly modified,
+ * and so on before a block report is happening.
  *
- * For each test case it runs two variations:
- *  #1 - For a given DN, the first variation sends block reports for all
- *       storages in a single call to the NN.
- *  #2 - For a given DN, the second variation sends block reports for each
- *       storage in a separate call.
- *
- * The behavior should be the same in either variation.
+ * By overriding {@link #sendBlockReports}, derived classes can test
+ * different variations of how block reports are split across storages
+ * and messages.
  */
-public class TestBlockReport {
-  public static final Log LOG = LogFactory.getLog(TestBlockReport.class);
+public abstract class BlockReportTestBase {
+  public static final Log LOG = LogFactory.getLog(BlockReportTestBase.class);
 
   private static short REPL_FACTOR = 1;
   private static final int RAND_LIMIT = 2000;
@@ -91,12 +86,11 @@ public class TestBlockReport {
   private static final int DN_N0 = 0;
   private static final int FILE_START = 0;
 
-  static final int BLOCK_SIZE = 1024;
-  static final int NUM_BLOCKS = 10;
-  static final int FILE_SIZE = NUM_BLOCKS * BLOCK_SIZE + 1;
-  static String bpid;
+  private static final int BLOCK_SIZE = 1024;
+  private static final int NUM_BLOCKS = 10;
+  private static final int FILE_SIZE = NUM_BLOCKS * BLOCK_SIZE + 1;
 
-  private MiniDFSCluster cluster;
+  protected MiniDFSCluster cluster;
   private DistributedFileSystem fs;
 
   private static Random rand = new Random(RAND_LIMIT);
@@ -112,8 +106,7 @@ public class TestBlockReport {
   public void startUpCluster() throws IOException {
     REPL_FACTOR = 1; //Reset if case a test has modified the value
     cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPL_FACTOR).build();
-    fs = (DistributedFileSystem) cluster.getFileSystem();
-    bpid = cluster.getNamesystem().getBlockPoolId();
+    fs = cluster.getFileSystem();
   }
 
   @After
@@ -123,6 +116,15 @@ public class TestBlockReport {
     cluster.shutdown();
   }
 
+  protected static void resetConfiguration() {
+    conf = new Configuration();
+    int customPerChecksumSize = 512;
+    int customBlockSize = customPerChecksumSize * 3;
+    conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
+    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);
+    conf.setLong(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, DN_RESCAN_INTERVAL);
+  }
+
   // Generate a block report, optionally corrupting the generation
   // stamp and/or length of one block.
   private static StorageBlockReport[] getBlockReports(
@@ -172,106 +174,11 @@ public class TestBlockReport {
    * @param dnR
    * @param poolId
    * @param reports
-   * @param needtoSplit
    * @throws IOException
    */
-  private void sendBlockReports(DatanodeRegistration dnR, String poolId,
-      StorageBlockReport[] reports, boolean needtoSplit) throws IOException {
-    if (!needtoSplit) {
-      LOG.info("Sending combined block reports for " + dnR);
-      cluster.getNameNodeRpc().blockReport(dnR, poolId, reports);
-    } else {
-      for (StorageBlockReport report : reports) {
-        LOG.info("Sending block report for storage " + report.getStorage().getStorageID());
-        StorageBlockReport[] singletonReport = { report };
-        cluster.getNameNodeRpc().blockReport(dnR, poolId, singletonReport);
-      }
-    }
-  }
+  protected abstract void sendBlockReports(DatanodeRegistration dnR, String poolId,
+      StorageBlockReport[] reports) throws IOException;
 
-  /**
-   * Test variations blockReport_01 through blockReport_09 with combined
-   * and split block reports.
-   */
-  @Test
-  public void blockReportCombined_01() throws IOException {
-    blockReport_01(false);
-  }
-
-  @Test
-  public void blockReportSplit_01() throws IOException {
-    blockReport_01(true);
-  }
-
-  @Test
-  public void blockReportCombined_02() throws IOException {
-    blockReport_02(false);
-  }
-
-  @Test
-  public void blockReportSplit_02() throws IOException {
-    blockReport_02(true);
-  }
-
-  @Test
-  public void blockReportCombined_03() throws IOException {
-    blockReport_03(false);
-  }
-
-  @Test
-  public void blockReportSplit_03() throws IOException {
-    blockReport_03(true);
-  }
-
-  @Test
-  public void blockReportCombined_04() throws IOException {
-    blockReport_04(false);
-  }
-
-  @Test
-  public void blockReportSplit_04() throws IOException {
-    blockReport_04(true);
-  }
-
-  @Test
-  public void blockReportCombined_06() throws Exception {
-    blockReport_06(false);
-  }
-
-  @Test
-  public void blockReportSplit_06() throws Exception {
-    blockReport_06(true);
-  }
-
-  @Test
-  public void blockReportCombined_07() throws Exception {
-    blockReport_07(false);
-  }
-
-  @Test
-  public void blockReportSplit_07() throws Exception {
-    blockReport_07(true);
-  }
-
-  @Test
-  public void blockReportCombined_08() throws Exception {
-    blockReport_08(false);
-  }
-
-  @Test
-  public void blockReportSplit_08() throws Exception {
-    blockReport_08(true);
-  }
-
-  @Test
-  public void blockReportCombined_09() throws Exception {
-    blockReport_09(false);
-  }
-
-  @Test
-  public void blockReportSplit_09() throws Exception {
-    blockReport_09(true);
-  }
   /**
    * Test write a file, verifies and closes it. Then the length of the blocks
    * are messed up and BlockReport is forced.
@@ -279,7 +186,8 @@ public class TestBlockReport {
    *
    * @throws java.io.IOException on an error
    */
-  private void blockReport_01(boolean splitBlockReports) throws IOException {
+  @Test(timeout=300000)
+  public void blockReport_01() throws IOException {
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
 
@@ -312,7 +220,7 @@ public class TestBlockReport {
     String poolId = cluster.getNamesystem().getBlockPoolId();
     DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
     StorageBlockReport[] reports = getBlockReports(dn, poolId, false, false);
-    sendBlockReports(dnR, poolId, reports, splitBlockReports);
+    sendBlockReports(dnR, poolId, reports);
 
     List<LocatedBlock> blocksAfterReport =
       DFSTestUtil.getAllBlocks(fs.open(filePath));
@@ -338,7 +246,8 @@ public class TestBlockReport {
    *
    * @throws IOException in case of errors
    */
-  private void blockReport_02(boolean splitBlockReports) throws IOException {
+  @Test(timeout=300000)
+  public void blockReport_02() throws IOException {
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     LOG.info("Running test " + METHOD_NAME);
 
@@ -393,7 +302,7 @@ public class TestBlockReport {
     String poolId = cluster.getNamesystem().getBlockPoolId();
     DatanodeRegistration dnR = dn0.getDNRegistrationForBP(poolId);
     StorageBlockReport[] reports = getBlockReports(dn0, poolId, false, false);
-    sendBlockReports(dnR, poolId, reports, splitBlockReports);
+    sendBlockReports(dnR, poolId, reports);
 
     BlockManagerTestUtil.getComputedDatanodeWork(cluster.getNamesystem()
         .getBlockManager());
@@ -414,17 +323,18 @@ public class TestBlockReport {
    *
    * @throws IOException in case of an error
    */
-  private void blockReport_03(boolean splitBlockReports) throws IOException {
+  @Test(timeout=300000)
+  public void blockReport_03() throws IOException {
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     ArrayList<Block> blocks = writeFile(METHOD_NAME, FILE_SIZE, filePath);
-    
+
     // all blocks belong to the same file, hence same BP
     DataNode dn = cluster.getDataNodes().get(DN_N0);
     String poolId = cluster.getNamesystem().getBlockPoolId();
     DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
     StorageBlockReport[] reports = getBlockReports(dn, poolId, true, false);
-    sendBlockReports(dnR, poolId, reports, splitBlockReports);
+    sendBlockReports(dnR, poolId, reports);
     printStats();
 
     assertThat("Wrong number of corrupt blocks",
@@ -441,7 +351,8 @@ public class TestBlockReport {
    *
    * @throws IOException in case of an error
    */
-  private void blockReport_04(boolean splitBlockReports) throws IOException {
+  @Test(timeout=300000)
+  public void blockReport_04() throws IOException {
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     DFSTestUtil.createFile(fs, filePath,
@@ -459,7 +370,7 @@ public class TestBlockReport {
 
     DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
     StorageBlockReport[] reports = getBlockReports(dn, poolId, false, false);
-    sendBlockReports(dnR, poolId, reports, splitBlockReports);
+    sendBlockReports(dnR, poolId, reports);
     printStats();
 
     assertThat("Wrong number of corrupt blocks",
@@ -476,7 +387,8 @@ public class TestBlockReport {
    *
    * @throws IOException in case of an error
    */
-  private void blockReport_06(boolean splitBlockReports) throws Exception {
+  @Test(timeout=300000)
+  public void blockReport_06() throws Exception {
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     final int DN_N1 = DN_N0 + 1;
@@ -489,7 +401,7 @@ public class TestBlockReport {
     String poolId = cluster.getNamesystem().getBlockPoolId();
     DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
     StorageBlockReport[] reports = getBlockReports(dn, poolId, false, false);
-    sendBlockReports(dnR, poolId, reports, splitBlockReports);
+    sendBlockReports(dnR, poolId, reports);
     printStats();
     assertEquals("Wrong number of PendingReplication Blocks",
       0, cluster.getNamesystem().getUnderReplicatedBlocks());
@@ -508,7 +420,8 @@ public class TestBlockReport {
    *
    * @throws IOException in case of an error
    */
-  private void blockReport_07(boolean splitBlockReports) throws Exception {
+  @Test(timeout=300000)
+  public void blockReport_07() throws Exception {
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     final int DN_N1 = DN_N0 + 1;
@@ -522,7 +435,7 @@ public class TestBlockReport {
     String poolId = cluster.getNamesystem().getBlockPoolId();
     DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
     StorageBlockReport[] reports = getBlockReports(dn, poolId, true, false);
-    sendBlockReports(dnR, poolId, reports, splitBlockReports);
+    sendBlockReports(dnR, poolId, reports);
     printStats();
 
     assertThat("Wrong number of corrupt blocks",
@@ -533,7 +446,7 @@ public class TestBlockReport {
                cluster.getNamesystem().getPendingReplicationBlocks(), is(0L));
 
     reports = getBlockReports(dn, poolId, true, true);
-    sendBlockReports(dnR, poolId, reports, splitBlockReports);
+    sendBlockReports(dnR, poolId, reports);
     printStats();
 
     assertThat("Wrong number of corrupt blocks",
@@ -559,7 +472,8 @@ public class TestBlockReport {
    *
    * @throws IOException in case of an error
    */
-  private void blockReport_08(boolean splitBlockReports) throws IOException {
+  @Test(timeout=300000)
+  public void blockReport_08() throws IOException {
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     final int DN_N1 = DN_N0 + 1;
@@ -578,13 +492,13 @@ public class TestBlockReport {
       bc.start();
 
       waitForTempReplica(bl, DN_N1);
-      
+
       // all blocks belong to the same file, hence same BP
       DataNode dn = cluster.getDataNodes().get(DN_N1);
       String poolId = cluster.getNamesystem().getBlockPoolId();
       DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
       StorageBlockReport[] reports = getBlockReports(dn, poolId, false, false);
-      sendBlockReports(dnR, poolId, reports, splitBlockReports);
+      sendBlockReports(dnR, poolId, reports);
       printStats();
       assertEquals("Wrong number of PendingReplication blocks",
         blocks.size(), cluster.getNamesystem().getPendingReplicationBlocks());
@@ -600,7 +514,8 @@ public class TestBlockReport {
   // Similar to BlockReport_08 but corrupts GS and len of the TEMPORARY's
   // replica block. Expect the same behaviour: NN should simply ignore this
   // block
-  private void blockReport_09(boolean splitBlockReports) throws IOException {
+  @Test(timeout=300000)
+  public void blockReport_09() throws IOException {
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
     final int DN_N1 = DN_N0 + 1;
@@ -620,17 +535,17 @@ public class TestBlockReport {
       bc.start();
 
       waitForTempReplica(bl, DN_N1);
-                                                
+
       // all blocks belong to the same file, hence same BP
       DataNode dn = cluster.getDataNodes().get(DN_N1);
       String poolId = cluster.getNamesystem().getBlockPoolId();
       DatanodeRegistration dnR = dn.getDNRegistrationForBP(poolId);
       StorageBlockReport[] reports = getBlockReports(dn, poolId, true, true);
-      sendBlockReports(dnR, poolId, reports, splitBlockReports);
+      sendBlockReports(dnR, poolId, reports);
       printStats();
       assertEquals("Wrong number of PendingReplication blocks",
         2, cluster.getNamesystem().getPendingReplicationBlocks());
-      
+
       try {
         bc.join();
       } catch (InterruptedException e) {}
@@ -638,7 +553,7 @@ public class TestBlockReport {
       resetConfiguration(); // return the initial state of the configuration
     }
   }
-  
+
   /**
    * Test for the case where one of the DNs in the pipeline is in the
    * process of doing a block report exactly when the block is closed.
@@ -648,7 +563,7 @@ public class TestBlockReport {
    * corrupt.
    * This is a regression test for HDFS-2791.
    */
-  @Test
+  @Test(timeout=300000)
   public void testOneReplicaRbwReportArrivesAfterBlockCompleted() throws Exception {
     final CountDownLatch brFinished = new CountDownLatch(1);
     DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG) {
@@ -663,7 +578,7 @@ public class TestBlockReport {
         }
       }
     };
-    
+
     final String METHOD_NAME = GenericTestUtils.getMethodName();
     Path filePath = new Path("/" + METHOD_NAME + ".dat");
 
@@ -671,9 +586,9 @@ public class TestBlockReport {
     // what happens when one of the DNs is slowed for some reason.
     REPL_FACTOR = 2;
     startDNandWait(null, false);
-    
+
     NameNode nn = cluster.getNameNode();
-    
+
     FSDataOutputStream out = fs.create(filePath, REPL_FACTOR);
     try {
       AppendTestUtil.write(out, 0, 10);
@@ -684,19 +599,19 @@ public class TestBlockReport {
       DataNode dn = cluster.getDataNodes().get(0);
       DatanodeProtocolClientSideTranslatorPB spy =
         DataNodeTestUtils.spyOnBposToNN(dn, nn);
-      
+
       Mockito.doAnswer(delayer)
         .when(spy).blockReport(
           Mockito.<DatanodeRegistration>anyObject(),
           Mockito.anyString(),
           Mockito.<StorageBlockReport[]>anyObject());
-      
+
       // Force a block report to be generated. The block report will have
       // an RBW replica in it. Wait for the RPC to be sent, but block
       // it before it gets to the NN.
       dn.scheduleAllBlockReport(0);
       delayer.waitForCall();
-      
+
     } finally {
       IOUtils.closeStream(out);
     }
@@ -705,22 +620,22 @@ public class TestBlockReport {
     // state.
     delayer.proceed();
     brFinished.await();
-    
+
     // Verify that no replicas are marked corrupt, and that the
     // file is still readable.
     BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager());
     assertEquals(0, nn.getNamesystem().getCorruptReplicaBlocks());
     DFSTestUtil.readFile(fs, filePath);
-    
+
     // Ensure that the file is readable even from the DN that we futzed with.
     cluster.stopDataNode(1);
-    DFSTestUtil.readFile(fs, filePath);    
+    DFSTestUtil.readFile(fs, filePath);
   }
 
   private void waitForTempReplica(Block bl, int DN_N1) throws IOException {
     final boolean tooLongWait = false;
     final int TIMEOUT = 40000;
-    
+
     if(LOG.isDebugEnabled()) {
       LOG.debug("Wait for datanode " + DN_N1 + " to appear");
     }
@@ -731,7 +646,7 @@ public class TestBlockReport {
       LOG.debug("Total number of DNs " + cluster.getDataNodes().size());
     }
     cluster.waitActive();
-    
+
     // Look about specified DN for the replica of the block from 1st DN
     final DataNode dn1 = cluster.getDataNodes().get(DN_N1);
     String bpid = cluster.getNamesystem().getBlockPoolId();
@@ -789,7 +704,7 @@ public class TestBlockReport {
     return blocks;
   }
 
-  private void startDNandWait(Path filePath, boolean waitReplicas) 
+  private void startDNandWait(Path filePath, boolean waitReplicas)
       throws IOException, InterruptedException, TimeoutException {
     if (LOG.isDebugEnabled()) {
       LOG.debug("Before next DN start: " + cluster.getDataNodes().size());
@@ -802,7 +717,7 @@ public class TestBlockReport {
     if (LOG.isDebugEnabled()) {
       int lastDn = datanodes.size() - 1;
       LOG.debug("New datanode "
-          + cluster.getDataNodes().get(lastDn).getDisplayName() 
+          + cluster.getDataNodes().get(lastDn).getDisplayName()
           + " has been started");
     }
     if (waitReplicas) {
@@ -898,7 +813,7 @@ public class TestBlockReport {
     ((Log4JLogger) NameNode.stateChangeLog).getLogger().setLevel(Level.ALL);
     ((Log4JLogger) LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.ALL);
     ((Log4JLogger) DataNode.LOG).getLogger().setLevel(Level.ALL);
-    ((Log4JLogger) TestBlockReport.LOG).getLogger().setLevel(Level.ALL);
+    ((Log4JLogger) BlockReportTestBase.LOG).getLogger().setLevel(Level.ALL);
   }
 
   private Block findBlock(Path path, long size) throws IOException {
@@ -918,11 +833,11 @@ public class TestBlockReport {
 
   private class BlockChecker extends Thread {
     Path filePath;
-    
+
     public BlockChecker(final Path filePath) {
       this.filePath = filePath;
     }
-    
+
     @Override
     public void run() {
       try {
@@ -933,13 +848,4 @@ public class TestBlockReport {
       }
     }
   }
-
-  private static void resetConfiguration() {
-    conf = new Configuration();
-    int customPerChecksumSize = 512;
-    int customBlockSize = customPerChecksumSize * 3;
-    conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
-    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);
-    conf.setLong(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, DN_RESCAN_INTERVAL);
-  }
 }

+ 16 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java

@@ -18,12 +18,16 @@
 package org.apache.hadoop.hdfs.server.datanode;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
 import java.io.DataOutputStream;
 import java.io.File;
 import java.net.InetSocketAddress;
 import java.net.Socket;
+import java.net.SocketException;
+import java.net.SocketTimeoutException;
+import java.nio.channels.ClosedChannelException;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -196,4 +200,16 @@ public class TestDiskError {
       }
     }
   }
+  
+  @Test
+  public void testNetworkErrorsIgnored() {
+    DataNode dn = cluster.getDataNodes().iterator().next();
+    
+    assertTrue(dn.isNetworkRelatedException(new SocketException()));
+    assertTrue(dn.isNetworkRelatedException(new SocketTimeoutException()));
+    assertTrue(dn.isNetworkRelatedException(new ClosedChannelException()));
+    assertTrue(dn.isNetworkRelatedException(new Exception("Broken pipe foo bar")));
+    assertFalse(dn.isNetworkRelatedException(new Exception()));
+    assertFalse(dn.isNetworkRelatedException(new Exception("random problem")));
+  }
 }

+ 205 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDnRespectsBlockReportSplitThreshold.java

@@ -0,0 +1,205 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.datanode;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.*;
+import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
+import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY;
+import org.apache.hadoop.test.GenericTestUtils;
+
+import org.junit.After;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mockito;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Matchers.*;
+import static org.mockito.Mockito.times;
+
+/**
+ * Tests that the DataNode respects
+ * {@link DFSConfigKeys#DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY}
+ */
+public class TestDnRespectsBlockReportSplitThreshold {
+  public static final Log LOG = LogFactory.getLog(TestStorageReport.class);
+
+  private static final int BLOCK_SIZE = 1024;
+  private static final short REPL_FACTOR = 1;
+  private static final long seed = 0xFEEDFACE;
+  private static final int BLOCKS_IN_FILE = 5;
+
+  private static Configuration conf;
+  private MiniDFSCluster cluster;
+  private DistributedFileSystem fs;
+  static String bpid;
+
+  public void startUpCluster(long splitThreshold) throws IOException {
+    conf = new HdfsConfiguration();
+    conf.setLong(DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY, splitThreshold);
+    cluster = new MiniDFSCluster.Builder(conf)
+        .numDataNodes(REPL_FACTOR)
+        .build();
+    fs = cluster.getFileSystem();
+    bpid = cluster.getNamesystem().getBlockPoolId();
+  }
+
+  @After
+  public void shutDownCluster() throws IOException {
+    if (cluster != null) {
+      fs.close();
+      cluster.shutdown();
+      cluster = null;
+    }
+  }
+
+  private void createFile(String filenamePrefix, int blockCount)
+      throws IOException {
+    Path path = new Path("/" + filenamePrefix + ".dat");
+    DFSTestUtil.createFile(fs, path, BLOCK_SIZE,
+        blockCount * BLOCK_SIZE, BLOCK_SIZE, REPL_FACTOR, seed);
+  }
+
+  private void verifyCapturedArguments(
+      ArgumentCaptor<StorageBlockReport[]> captor,
+      int expectedReportsPerCall,
+      int expectedTotalBlockCount) {
+
+    List<StorageBlockReport[]> listOfReports = captor.getAllValues();
+    int numBlocksReported = 0;
+    for (StorageBlockReport[] reports : listOfReports) {
+      assertThat(reports.length, is(expectedReportsPerCall));
+
+      for (StorageBlockReport report : reports) {
+        BlockListAsLongs blockList = new BlockListAsLongs(report.getBlocks());
+        numBlocksReported += blockList.getNumberOfBlocks();
+      }
+    }
+
+    assert(numBlocksReported >= expectedTotalBlockCount);
+  }
+
+  /**
+   * Test that if splitThreshold is zero, then we always get a separate
+   * call per storage.
+   */
+  @Test(timeout=300000)
+  public void testAlwaysSplit() throws IOException, InterruptedException {
+    startUpCluster(0);
+    NameNode nn = cluster.getNameNode();
+    DataNode dn = cluster.getDataNodes().get(0);
+
+    // Create a file with a few blocks.
+    createFile(GenericTestUtils.getMethodName(), BLOCKS_IN_FILE);
+
+    // Insert a spy object for the NN RPC.
+    DatanodeProtocolClientSideTranslatorPB nnSpy =
+        DataNodeTestUtils.spyOnBposToNN(dn, nn);
+
+    // Trigger a block report so there is an interaction with the spy
+    // object.
+    DataNodeTestUtils.triggerBlockReport(dn);
+
+    ArgumentCaptor<StorageBlockReport[]> captor =
+        ArgumentCaptor.forClass(StorageBlockReport[].class);
+
+    Mockito.verify(nnSpy, times(MiniDFSCluster.DIRS_PER_DATANODE)).blockReport(
+        any(DatanodeRegistration.class),
+        anyString(),
+        captor.capture());
+
+    verifyCapturedArguments(captor, 1, BLOCKS_IN_FILE);
+  }
+
+  /**
+   * Tests the behavior when the count of blocks is exactly one less than
+   * the threshold.
+   */
+  @Test(timeout=300000)
+  public void testCornerCaseUnderThreshold() throws IOException, InterruptedException {
+    startUpCluster(BLOCKS_IN_FILE + 1);
+    NameNode nn = cluster.getNameNode();
+    DataNode dn = cluster.getDataNodes().get(0);
+
+    // Create a file with a few blocks.
+    createFile(GenericTestUtils.getMethodName(), BLOCKS_IN_FILE);
+
+    // Insert a spy object for the NN RPC.
+    DatanodeProtocolClientSideTranslatorPB nnSpy =
+        DataNodeTestUtils.spyOnBposToNN(dn, nn);
+
+    // Trigger a block report so there is an interaction with the spy
+    // object.
+    DataNodeTestUtils.triggerBlockReport(dn);
+
+    ArgumentCaptor<StorageBlockReport[]> captor =
+        ArgumentCaptor.forClass(StorageBlockReport[].class);
+
+    Mockito.verify(nnSpy, times(1)).blockReport(
+        any(DatanodeRegistration.class),
+        anyString(),
+        captor.capture());
+
+    verifyCapturedArguments(captor, MiniDFSCluster.DIRS_PER_DATANODE, BLOCKS_IN_FILE);
+  }
+
+  /**
+   * Tests the behavior when the count of blocks is exactly equal to the
+   * threshold.
+   */
+  @Test(timeout=300000)
+  public void testCornerCaseAtThreshold() throws IOException, InterruptedException {
+    startUpCluster(BLOCKS_IN_FILE);
+    NameNode nn = cluster.getNameNode();
+    DataNode dn = cluster.getDataNodes().get(0);
+
+    // Create a file with a few blocks.
+    createFile(GenericTestUtils.getMethodName(), BLOCKS_IN_FILE);
+
+    // Insert a spy object for the NN RPC.
+    DatanodeProtocolClientSideTranslatorPB nnSpy =
+        DataNodeTestUtils.spyOnBposToNN(dn, nn);
+
+    // Trigger a block report so there is an interaction with the spy
+    // object.
+    DataNodeTestUtils.triggerBlockReport(dn);
+
+    ArgumentCaptor<StorageBlockReport[]> captor =
+        ArgumentCaptor.forClass(StorageBlockReport[].class);
+
+    Mockito.verify(nnSpy, times(MiniDFSCluster.DIRS_PER_DATANODE)).blockReport(
+        any(DatanodeRegistration.class),
+        anyString(),
+        captor.capture());
+
+    verifyCapturedArguments(captor, 1, BLOCKS_IN_FILE);
+  }
+
+}

+ 42 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestNNHandlesBlockReportPerStorage.java

@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.datanode;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
+
+
+/**
+ * Runs all tests in BlockReportTestBase, sending one block per storage.
+ * This is the default DataNode behavior post HDFS-2832.
+ */
+public class TestNNHandlesBlockReportPerStorage extends BlockReportTestBase {
+
+  @Override
+  protected void sendBlockReports(DatanodeRegistration dnR, String poolId,
+      StorageBlockReport[] reports) throws IOException {
+    for (StorageBlockReport report : reports) {
+      LOG.info("Sending block report for storage " + report.getStorage().getStorageID());
+      StorageBlockReport[] singletonReport = { report };
+      cluster.getNameNodeRpc().blockReport(dnR, poolId, singletonReport);
+    }
+  }
+}

+ 39 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestNNHandlesCombinedBlockReport.java

@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.datanode;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
+
+/**
+ * Runs all tests in BlockReportTestBase, sending one block report
+ * per DataNode. This tests that the NN can handle the legacy DN
+ * behavior where it presents itself as a single logical storage.
+ */
+public class TestNNHandlesCombinedBlockReport extends BlockReportTestBase {
+
+  @Override
+  protected void sendBlockReports(DatanodeRegistration dnR, String poolId,
+                                  StorageBlockReport[] reports) throws IOException {
+    LOG.info("Sending combined block reports for " + dnR);
+    cluster.getNameNodeRpc().blockReport(dnR, poolId, reports);
+  }
+}

File diff suppressed because it is too large
+ 168 - 168
hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json


+ 7 - 0
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java

@@ -54,6 +54,7 @@ import org.apache.hadoop.yarn.api.records.QueueInfo;
 import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
@@ -865,4 +866,10 @@ public class ResourceSchedulerWrapper implements
   public RMContainer getRMContainer(ContainerId containerId) {
     return null;
   }
+
+  @Override
+  public String moveApplication(ApplicationId appId, String newQueue)
+      throws YarnException {
+    return scheduler.moveApplication(appId, newQueue);
+  }
 }

+ 8 - 0
hadoop-yarn-project/CHANGES.txt

@@ -12,6 +12,8 @@ Trunk - Unreleased
     YARN-1498. Common scheduler changes for moving apps between queues (Sandy
     Ryza)
 
+    YARN-1504. RM changes for moving apps between queues (Sandy Ryza)
+
   IMPROVEMENTS
 
   OPTIMIZATIONS
@@ -77,6 +79,9 @@ Release 2.4.0 - UNRELEASED
     YARN-1413. Implemented serving of aggregated-logs in the ApplicationHistory
     server. (Mayank Bansal via vinodkv)
 
+    YARN-1633. Defined user-facing entity, entity-info and event objects related
+    to Application Timeline feature. (Zhijie Shen via vinodkv)
+
   IMPROVEMENTS
 
     YARN-1007. Enhance History Reader interface for Containers. (Mayank Bansal via
@@ -140,6 +145,9 @@ Release 2.4.0 - UNRELEASED
 
     YARN-1642. RMDTRenewer#getRMClient should use ClientRMProxy (kasha)
 
+    YARN-1632. TestApplicationMasterServices should be under
+    org.apache.hadoop.yarn.server.resourcemanager package (Chen He via jeagles)
+
 Release 2.3.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 88 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/apptimeline/ATSEntities.java

@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.records.apptimeline;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlRootElement;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+
+/**
+ * The class that hosts a list of application timeline entities.
+ */
+@XmlRootElement(name = "entities")
+@XmlAccessorType(XmlAccessType.NONE)
+@Public
+@Unstable
+public class ATSEntities {
+
+  private List<ATSEntity> entities =
+      new ArrayList<ATSEntity>();
+
+  public ATSEntities() {
+
+  }
+
+  /**
+   * Get a list of entities
+   * 
+   * @return a list of entities
+   */
+  @XmlElement(name = "entities")
+  public List<ATSEntity> getEntities() {
+    return entities;
+  }
+
+  /**
+   * Add a single entity into the existing entity list
+   * 
+   * @param entity
+   *          a single entity
+   */
+  public void addEntity(ATSEntity entity) {
+    entities.add(entity);
+  }
+
+  /**
+   * All a list of entities into the existing entity list
+   * 
+   * @param entities
+   *          a list of entities
+   */
+  public void addEntities(List<ATSEntity> entities) {
+    this.entities.addAll(entities);
+  }
+
+  /**
+   * Set the entity list to the given list of entities
+   * 
+   * @param entities
+   *          a list of entities
+   */
+  public void setEntities(List<ATSEntity> entities) {
+    this.entities = entities;
+  }
+
+}

+ 314 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/apptimeline/ATSEntity.java

@@ -0,0 +1,314 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.records.apptimeline;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlRootElement;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+
+/**
+ * <p>
+ * The class that contains the the meta information of some conceptual entity of
+ * an application and its related events. The entity can be an application, an
+ * application attempt, a container or whatever the user-defined object.
+ * </p>
+ * 
+ * <p>
+ * Primary filters will be used to index the entities in
+ * <code>ApplicationTimelineStore</code>, such that users should carefully
+ * choose the information they want to store as the primary filters. The
+ * remaining can be stored as other information.
+ * </p>
+ */
+@XmlRootElement(name = "entity")
+@XmlAccessorType(XmlAccessType.NONE)
+@Public
+@Unstable
+public class ATSEntity {
+
+  private String entityType;
+  private String entityId;
+  private long startTime;
+  private List<ATSEvent> events = new ArrayList<ATSEvent>();
+  private Map<String, List<Object>> relatedEntities =
+      new HashMap<String, List<Object>>();
+  private Map<String, Object> primaryFilters =
+      new HashMap<String, Object>();
+  private Map<String, Object> otherInfo =
+      new HashMap<String, Object>();
+
+  public ATSEntity() {
+
+  }
+
+  /**
+   * Get the entity type
+   * 
+   * @return the entity type
+   */
+  @XmlElement(name = "entitytype")
+  public String getEntityType() {
+    return entityType;
+  }
+
+  /**
+   * Set the entity type
+   * 
+   * @param entityType
+   *          the entity type
+   */
+  public void setEntityType(String entityType) {
+    this.entityType = entityType;
+  }
+
+  /**
+   * Get the entity Id
+   * 
+   * @return the entity Id
+   */
+  @XmlElement(name = "entity")
+  public String getEntityId() {
+    return entityId;
+  }
+
+  /**
+   * Set the entity Id
+   * 
+   * @param entityId
+   *          the entity Id
+   */
+  public void setEntityId(String entityId) {
+    this.entityId = entityId;
+  }
+
+  /**
+   * Get the start time of the entity
+   * 
+   * @return the start time of the entity
+   */
+  @XmlElement(name = "starttime")
+  public long getStartTime() {
+    return startTime;
+  }
+
+  /**
+   * Set the start time of the entity
+   * 
+   * @param startTime
+   *          the start time of the entity
+   */
+  public void setStartTime(long startTime) {
+    this.startTime = startTime;
+  }
+
+  /**
+   * Get a list of events related to the entity
+   * 
+   * @return a list of events related to the entity
+   */
+  @XmlElement(name = "events")
+  public List<ATSEvent> getEvents() {
+    return events;
+  }
+
+  /**
+   * Add a single event related to the entity to the existing event list
+   * 
+   * @param event
+   *          a single event related to the entity
+   */
+  public void addEvent(ATSEvent event) {
+    events.add(event);
+  }
+
+  /**
+   * Add a list of events related to the entity to the existing event list
+   * 
+   * @param events
+   *          a list of events related to the entity
+   */
+  public void addEvents(List<ATSEvent> events) {
+    this.events.addAll(events);
+  }
+
+  /**
+   * Set the event list to the given list of events related to the entity
+   * 
+   * @param events
+   *          events a list of events related to the entity
+   */
+  public void setEvents(List<ATSEvent> events) {
+    this.events = events;
+  }
+
+  /**
+   * Get the related entities
+   * 
+   * @return the related entities
+   */
+  @XmlElement(name = "relatedentities")
+  public Map<String, List<Object>> getRelatedEntities() {
+    return relatedEntities;
+  }
+
+  /**
+   * Add a list of entity of the same type to the existing related entity map
+   * 
+   * @param entityType
+   *          the entity type
+   * @param entityIds
+   *          a list of entity Ids
+   */
+  public void addRelatedEntity(String entityType, List<Object> entityIds) {
+    List<Object> thisRelatedEntity = relatedEntities.get(entityType);
+    relatedEntities.put(entityType, entityIds);
+    if (thisRelatedEntity == null) {
+      relatedEntities.put(entityType, entityIds);
+    } else {
+      thisRelatedEntity.addAll(entityIds);
+    }
+  }
+
+  /**
+   * Add a map of related entities to the existing related entity map
+   * 
+   * @param relatedEntities
+   *          a map of related entities
+   */
+  public void addRelatedEntities(
+      Map<String, List<Object>> relatedEntities) {
+    for (Map.Entry<String, List<Object>> relatedEntity : relatedEntities
+        .entrySet()) {
+      List<Object> thisRelatedEntity =
+          this.relatedEntities.get(relatedEntity.getKey());
+      if (thisRelatedEntity == null) {
+        this.relatedEntities.put(
+            relatedEntity.getKey(), relatedEntity.getValue());
+      } else {
+        thisRelatedEntity.addAll(relatedEntity.getValue());
+      }
+    }
+  }
+
+  /**
+   * Set the related entity map to the given map of related entities
+   * 
+   * @param relatedEntities
+   *          a map of related entities
+   */
+  public void setRelatedEntities(
+      Map<String, List<Object>> relatedEntities) {
+    this.relatedEntities = relatedEntities;
+  }
+
+  /**
+   * Get the primary filters
+   * 
+   * @return the primary filters
+   */
+  @XmlElement(name = "primaryfilters")
+  public Map<String, Object> getPrimaryFilters() {
+    return primaryFilters;
+  }
+
+  /**
+   * Add a single piece of primary filter to the existing primary filter map
+   * 
+   * @param key
+   *          the primary filter key
+   * @param value
+   *          the primary filter value
+   */
+  public void addPrimaryFilter(String key, Object value) {
+    primaryFilters.put(key, value);
+  }
+
+  /**
+   * Add a map of primary filters to the existing primary filter map
+   * 
+   * @param primaryFilters
+   *          a map of primary filters
+   */
+  public void addPrimaryFilters(Map<String, Object> primaryFilters) {
+    this.primaryFilters.putAll(primaryFilters);
+  }
+
+  /**
+   * Set the primary filter map to the given map of primary filters
+   * 
+   * @param primaryFilters
+   *          a map of primary filters
+   */
+  public void setPrimaryFilters(Map<String, Object> primaryFilters) {
+    this.primaryFilters = primaryFilters;
+  }
+
+  /**
+   * Get the other information of the entity
+   * 
+   * @return the other information of the entity
+   */
+  @XmlElement(name = "otherinfo")
+  public Map<String, Object> getOtherInfo() {
+    return otherInfo;
+  }
+
+  /**
+   * Add one piece of other information of the entity to the existing other info
+   * map
+   * 
+   * @param key
+   *          the other information key
+   * @param value
+   *          the other information value
+   */
+  public void addOtherInfo(String key, Object value) {
+    this.otherInfo.put(key, value);
+  }
+
+  /**
+   * Add a map of other information of the entity to the existing other info map
+   * 
+   * @param otherInfo
+   *          a map of other information
+   */
+  public void addOtherInfo(Map<String, Object> otherInfo) {
+    this.otherInfo.putAll(otherInfo);
+  }
+
+  /**
+   * Set the other info map to the given map of other information
+   * 
+   * @param otherInfo
+   *          a map of other information
+   */
+  public void setOtherInfo(Map<String, Object> otherInfo) {
+    this.otherInfo = otherInfo;
+  }
+
+}

+ 134 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/apptimeline/ATSEvent.java

@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.records.apptimeline;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlRootElement;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+
+/**
+ * The class that contains the information of an event that is related to some
+ * conceptual entity of an application. Users are free to define what the event
+ * means, such as starting an application, getting allocated a container and
+ * etc.
+ */
+@XmlRootElement(name = "event")
+@XmlAccessorType(XmlAccessType.NONE)
+@Public
+@Unstable
+public class ATSEvent {
+
+  private long timestamp;
+  private String eventType;
+  private Map<String, Object> eventInfo = new HashMap<String, Object>();
+
+  public ATSEvent() {
+  }
+
+  /**
+   * Get the timestamp of the event
+   * 
+   * @return the timestamp of the event
+   */
+  @XmlElement(name = "timestamp")
+  public long getTimestamp() {
+    return timestamp;
+  }
+
+  /**
+   * Set the timestamp of the event
+   * 
+   * @param timestamp
+   *          the timestamp of the event
+   */
+  public void setTimestamp(long timestamp) {
+    this.timestamp = timestamp;
+  }
+
+  /**
+   * Get the event type
+   * 
+   * @return the event type
+   */
+  @XmlElement(name = "eventtype")
+  public String getEventType() {
+    return eventType;
+  }
+
+  /**
+   * Set the event type
+   * 
+   * @param eventType
+   *          the event type
+   */
+  public void setEventType(String eventType) {
+    this.eventType = eventType;
+  }
+
+  /**
+   * Set the information of the event
+   * 
+   * @return the information of the event
+   */
+  @XmlElement(name = "eventinfo")
+  public Map<String, Object> getEventInfo() {
+    return eventInfo;
+  }
+
+  /**
+   * Add one piece of the information of the event to the existing information
+   * map
+   * 
+   * @param key
+   *          the information key
+   * @param value
+   *          the information value
+   */
+  public void addEventInfo(String key, Object value) {
+    this.eventInfo.put(key, value);
+  }
+
+  /**
+   * Add a map of the information of the event to the existing information map
+   * 
+   * @param eventInfo
+   *          a map of of the information of the event
+   */
+  public void addEventInfo(Map<String, Object> eventInfo) {
+    this.eventInfo.putAll(eventInfo);
+  }
+
+  /**
+   * Set the information map to the given map of the information of the event
+   * 
+   * @param eventInfo
+   *          a map of of the information of the event
+   */
+  public void setEventInfo(Map<String, Object> eventInfo) {
+    this.eventInfo = eventInfo;
+  }
+
+}

+ 189 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/apptimeline/ATSEvents.java

@@ -0,0 +1,189 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.records.apptimeline;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlRootElement;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+
+/**
+ * The class that hosts a list of events, which are categorized according to
+ * their related entities.
+ */
+@XmlRootElement(name = "events")
+@XmlAccessorType(XmlAccessType.NONE)
+@Public
+@Unstable
+public class ATSEvents {
+
+  private List<ATSEventsOfOneEntity> allEvents =
+      new ArrayList<ATSEventsOfOneEntity>();
+
+  public ATSEvents() {
+
+  }
+
+  /**
+   * Get a list of {@link ATSEventsOfOneEntity} instances
+   * 
+   * @return a list of {@link ATSEventsOfOneEntity} instances
+   */
+  @XmlElement(name = "events")
+  public List<ATSEventsOfOneEntity> getAllEvents() {
+    return allEvents;
+  }
+
+  /**
+   * Add a single {@link ATSEventsOfOneEntity} instance into the existing list
+   * 
+   * @param eventsOfOneEntity
+   *          a single {@link ATSEventsOfOneEntity} instance
+   */
+  public void addEvent(ATSEventsOfOneEntity eventsOfOneEntity) {
+    allEvents.add(eventsOfOneEntity);
+  }
+
+  /**
+   * Add a list of {@link ATSEventsOfOneEntity} instances into the existing list
+   * 
+   * @param allEvents
+   *          a list of {@link ATSEventsOfOneEntity} instances
+   */
+  public void addEvents(List<ATSEventsOfOneEntity> allEvents) {
+    this.allEvents.addAll(allEvents);
+  }
+
+  /**
+   * Set the list to the given list of {@link ATSEventsOfOneEntity} instances
+   * 
+   * @param allEvents
+   *          a list of {@link ATSEventsOfOneEntity} instances
+   */
+  public void setEvents(List<ATSEventsOfOneEntity> allEvents) {
+    this.allEvents.clear();
+    this.allEvents.addAll(allEvents);
+  }
+
+  /**
+   * The class that hosts a list of events that are only related to one entity.
+   */
+  @XmlRootElement(name = "events")
+  @XmlAccessorType(XmlAccessType.NONE)
+  @Public
+  @Unstable
+  public static class ATSEventsOfOneEntity {
+
+    private String entityId;
+    private String entityType;
+    private List<ATSEvent> events = new ArrayList<ATSEvent>();
+
+    public ATSEventsOfOneEntity() {
+
+    }
+
+    /**
+     * Get the entity Id
+     * 
+     * @return the entity Id
+     */
+    @XmlElement(name = "entity")
+    public String getEntityId() {
+      return entityId;
+    }
+
+    /**
+     * Set the entity Id
+     * 
+     * @param entityId
+     *          the entity Id
+     */
+    public void setEntityId(String entityId) {
+      this.entityId = entityId;
+    }
+
+    /**
+     * Get the entity type
+     * 
+     * @return the entity type
+     */
+    @XmlElement(name = "entitytype")
+    public String getEntityType() {
+      return entityType;
+    }
+
+    /**
+     * Set the entity type
+     * 
+     * @param entityType
+     *          the entity type
+     */
+    public void setEntityType(String entityType) {
+      this.entityType = entityType;
+    }
+
+    /**
+     * Get a list of events
+     * 
+     * @return a list of events
+     */
+    @XmlElement(name = "events")
+    public List<ATSEvent> getEvents() {
+      return events;
+    }
+
+    /**
+     * Add a single event to the existing event list
+     * 
+     * @param event
+     *          a single event
+     */
+    public void addEntity(ATSEvent event) {
+      events.add(event);
+    }
+
+    /**
+     * Add a list of event to the existing event list
+     * 
+     * @param events
+     *          a list of events
+     */
+    public void addEvents(List<ATSEvent> events) {
+      this.events.addAll(events);
+    }
+
+    /**
+     * Set the event list to the given list of events
+     * 
+     * @param events
+     *          a list of events
+     */
+    public void setEvents(List<ATSEvent> events) {
+      this.events = events;
+    }
+
+  }
+
+}

+ 21 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/apptimeline/package-info.java

@@ -0,0 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+@InterfaceAudience.Public
+package org.apache.hadoop.yarn.api.records.apptimeline;
+import org.apache.hadoop.classification.InterfaceAudience;
+

+ 113 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/apptimeline/TestApplicationTimelineRecords.java

@@ -0,0 +1,113 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.records.apptimeline;
+
+import java.util.Arrays;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+public class TestApplicationTimelineRecords {
+
+  @Test
+  public void testATSEntities() {
+    ATSEntities entities = new ATSEntities();
+    for (int j = 0; j < 2; ++j) {
+      ATSEntity entity = new ATSEntity();
+      entity.setEntityId("entity id " + j);
+      entity.setEntityType("entity type " + j);
+      entity.setStartTime(System.currentTimeMillis());
+      for (int i = 0; i < 2; ++i) {
+        ATSEvent event = new ATSEvent();
+        event.setTimestamp(System.currentTimeMillis());
+        event.setEventType("event type " + i);
+        event.addEventInfo("key1", "val1");
+        event.addEventInfo("key2", "val2");
+        entity.addEvent(event);
+      }
+      entity.addRelatedEntity(
+          "test ref type 1", Arrays.asList((Object) "test ref id 1"));
+      entity.addRelatedEntity(
+          "test ref type 2", Arrays.asList((Object) "test ref id 2"));
+      entity.addPrimaryFilter("pkey1", "pval1");
+      entity.addPrimaryFilter("pkey2", "pval2");
+      entity.addOtherInfo("okey1", "oval1");
+      entity.addOtherInfo("okey2", "oval2");
+      entities.addEntity(entity);
+    }
+    Assert.assertEquals(2, entities.getEntities().size());
+    ATSEntity entity1 = entities.getEntities().get(0);
+    Assert.assertEquals("entity id 0", entity1.getEntityId());
+    Assert.assertEquals("entity type 0", entity1.getEntityType());
+    Assert.assertEquals(2, entity1.getRelatedEntities().size());
+    Assert.assertEquals(2, entity1.getEvents().size());
+    Assert.assertEquals(2, entity1.getPrimaryFilters().size());
+    Assert.assertEquals(2, entity1.getOtherInfo().size());
+    ATSEntity entity2 = entities.getEntities().get(1);
+    Assert.assertEquals("entity id 1", entity2.getEntityId());
+    Assert.assertEquals("entity type 1", entity2.getEntityType());
+    Assert.assertEquals(2, entity2.getRelatedEntities().size());
+    Assert.assertEquals(2, entity2.getEvents().size());
+    Assert.assertEquals(2, entity2.getPrimaryFilters().size());
+    Assert.assertEquals(2, entity2.getOtherInfo().size());
+  }
+
+  @Test
+  public void testATSEvents() {
+    ATSEvents events = new ATSEvents();
+    for (int j = 0; j < 2; ++j) {
+      ATSEvents.ATSEventsOfOneEntity partEvents =
+          new ATSEvents.ATSEventsOfOneEntity();
+      partEvents.setEntityId("entity id " + j);
+      partEvents.setEntityType("entity type " + j);
+      for (int i = 0; i < 2; ++i) {
+        ATSEvent event = new ATSEvent();
+        event.setTimestamp(System.currentTimeMillis());
+        event.setEventType("event type " + i);
+        event.addEventInfo("key1", "val1");
+        event.addEventInfo("key2", "val2");
+        partEvents.addEntity(event);
+      }
+      events.addEvent(partEvents);
+    }
+    Assert.assertEquals(2, events.getAllEvents().size());
+    ATSEvents.ATSEventsOfOneEntity partEvents1 = events.getAllEvents().get(0);
+    Assert.assertEquals("entity id 0", partEvents1.getEntityId());
+    Assert.assertEquals("entity type 0", partEvents1.getEntityType());
+    Assert.assertEquals(2, partEvents1.getEvents().size());
+    ATSEvent event11 = partEvents1.getEvents().get(0);
+    Assert.assertEquals("event type 0", event11.getEventType());
+    Assert.assertEquals(2, event11.getEventInfo().size());
+    ATSEvent event12 = partEvents1.getEvents().get(1);
+    Assert.assertEquals("event type 1", event12.getEventType());
+    Assert.assertEquals(2, event12.getEventInfo().size());
+    ATSEvents.ATSEventsOfOneEntity partEvents2 = events.getAllEvents().get(1);
+    Assert.assertEquals("entity id 1", partEvents2.getEntityId());
+    Assert.assertEquals("entity type 1", partEvents2.getEntityType());
+    Assert.assertEquals(2, partEvents2.getEvents().size());
+    ATSEvent event21 = partEvents2.getEvents().get(0);
+    Assert.assertEquals("event type 0", event21.getEventType());
+    Assert.assertEquals(2, event21.getEventInfo().size());
+    ATSEvent event22 = partEvents2.getEvents().get(1);
+    Assert.assertEquals("event type 1", event22.getEventType());
+    Assert.assertEquals(2, event22.getEventInfo().size());
+  }
+
+}

+ 70 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java

@@ -94,6 +94,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstant
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMoveEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
@@ -104,6 +106,9 @@ import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.apache.hadoop.yarn.util.Records;
 
+import com.google.common.util.concurrent.Futures;
+import com.google.common.util.concurrent.SettableFuture;
+
 
 /**
  * The client interface to the Resource Manager. This module handles all the rpc
@@ -686,10 +691,74 @@ public class ClientRMService extends AbstractService implements
     }
   }
   
+  @SuppressWarnings("unchecked")
   @Override
   public MoveApplicationAcrossQueuesResponse moveApplicationAcrossQueues(
       MoveApplicationAcrossQueuesRequest request) throws YarnException {
-    throw new UnsupportedOperationException("Move not yet supported");
+    ApplicationId applicationId = request.getApplicationId();
+
+    UserGroupInformation callerUGI;
+    try {
+      callerUGI = UserGroupInformation.getCurrentUser();
+    } catch (IOException ie) {
+      LOG.info("Error getting UGI ", ie);
+      RMAuditLogger.logFailure("UNKNOWN", AuditConstants.MOVE_APP_REQUEST,
+          "UNKNOWN", "ClientRMService" , "Error getting UGI",
+          applicationId);
+      throw RPCUtil.getRemoteException(ie);
+    }
+
+    RMApp application = this.rmContext.getRMApps().get(applicationId);
+    if (application == null) {
+      RMAuditLogger.logFailure(callerUGI.getUserName(),
+          AuditConstants.MOVE_APP_REQUEST, "UNKNOWN", "ClientRMService",
+          "Trying to move an absent application", applicationId);
+      throw new ApplicationNotFoundException("Trying to move an absent"
+          + " application " + applicationId);
+    }
+
+    if (!checkAccess(callerUGI, application.getUser(),
+        ApplicationAccessType.MODIFY_APP, application)) {
+      RMAuditLogger.logFailure(callerUGI.getShortUserName(),
+          AuditConstants.MOVE_APP_REQUEST,
+          "User doesn't have permissions to "
+              + ApplicationAccessType.MODIFY_APP.toString(), "ClientRMService",
+          AuditConstants.UNAUTHORIZED_USER, applicationId);
+      throw RPCUtil.getRemoteException(new AccessControlException("User "
+          + callerUGI.getShortUserName() + " cannot perform operation "
+          + ApplicationAccessType.MODIFY_APP.name() + " on " + applicationId));
+    }
+    
+    // Moves only allowed when app is in a state that means it is tracked by
+    // the scheduler
+    if (EnumSet.of(RMAppState.NEW, RMAppState.NEW_SAVING, RMAppState.FAILED,
+        RMAppState.FINAL_SAVING, RMAppState.FINISHING, RMAppState.FINISHED,
+        RMAppState.KILLED, RMAppState.KILLING, RMAppState.FAILED)
+        .contains(application.getState())) {
+      String msg = "App in " + application.getState() + " state cannot be moved.";
+      RMAuditLogger.logFailure(callerUGI.getShortUserName(),
+          AuditConstants.MOVE_APP_REQUEST, "UNKNOWN", "ClientRMService", msg);
+      throw new YarnException(msg);
+    }
+
+    SettableFuture<Object> future = SettableFuture.create();
+    this.rmContext.getDispatcher().getEventHandler().handle(
+        new RMAppMoveEvent(applicationId, request.getTargetQueue(), future));
+    
+    try {
+      Futures.get(future, YarnException.class);
+    } catch (YarnException ex) {
+      RMAuditLogger.logFailure(callerUGI.getShortUserName(),
+          AuditConstants.MOVE_APP_REQUEST, "UNKNOWN", "ClientRMService",
+          ex.getMessage());
+      throw ex;
+    }
+
+    RMAuditLogger.logSuccess(callerUGI.getShortUserName(), 
+        AuditConstants.MOVE_APP_REQUEST, "ClientRMService" , applicationId);
+    MoveApplicationAcrossQueuesResponse response = recordFactory
+        .newRecordInstance(MoveApplicationAcrossQueuesResponse.class);
+    return response;
   }
 
   private String getRenewerForToken(Token<RMDelegationTokenIdentifier> token)

+ 1 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAuditLogger.java

@@ -45,6 +45,7 @@ public class RMAuditLogger {
 
     public static final String KILL_APP_REQUEST = "Kill Application Request";
     public static final String SUBMIT_APP_REQUEST = "Submit Application Request";
+    public static final String MOVE_APP_REQUEST = "Move Application Request";
     public static final String FINISH_SUCCESS_APP = "Application Finished - Succeeded";
     public static final String FINISH_FAILED_APP = "Application Finished - Failed";
     public static final String FINISH_KILLED_APP = "Application Finished - Killed";

+ 1 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java

@@ -23,6 +23,7 @@ public enum RMAppEventType {
   START,
   RECOVER,
   KILL,
+  MOVE, // Move app to a new queue
 
   // Source: Scheduler and RMAppManager
   APP_REJECTED,

+ 32 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java

@@ -47,6 +47,7 @@ import org.apache.hadoop.yarn.api.records.YarnApplicationState;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
 import org.apache.hadoop.yarn.security.client.ClientToAMTokenIdentifier;
@@ -166,6 +167,8 @@ public class RMAppImpl implements RMApp, Recoverable {
      // Transitions from SUBMITTED state
     .addTransition(RMAppState.SUBMITTED, RMAppState.SUBMITTED,
         RMAppEventType.NODE_UPDATE, new RMAppNodeUpdateTransition())
+    .addTransition(RMAppState.SUBMITTED, RMAppState.SUBMITTED,
+        RMAppEventType.MOVE, new RMAppMoveTransition())
     .addTransition(RMAppState.SUBMITTED, RMAppState.FINAL_SAVING,
         RMAppEventType.APP_REJECTED,
         new FinalSavingTransition(
@@ -181,6 +184,8 @@ public class RMAppImpl implements RMApp, Recoverable {
      // Transitions from ACCEPTED state
     .addTransition(RMAppState.ACCEPTED, RMAppState.ACCEPTED,
         RMAppEventType.NODE_UPDATE, new RMAppNodeUpdateTransition())
+    .addTransition(RMAppState.ACCEPTED, RMAppState.ACCEPTED,
+        RMAppEventType.MOVE, new RMAppMoveTransition())
     .addTransition(RMAppState.ACCEPTED, RMAppState.RUNNING,
         RMAppEventType.ATTEMPT_REGISTERED)
     .addTransition(RMAppState.ACCEPTED,
@@ -204,6 +209,8 @@ public class RMAppImpl implements RMApp, Recoverable {
      // Transitions from RUNNING state
     .addTransition(RMAppState.RUNNING, RMAppState.RUNNING,
         RMAppEventType.NODE_UPDATE, new RMAppNodeUpdateTransition())
+    .addTransition(RMAppState.RUNNING, RMAppState.RUNNING,
+        RMAppEventType.MOVE, new RMAppMoveTransition())
     .addTransition(RMAppState.RUNNING, RMAppState.FINAL_SAVING,
         RMAppEventType.ATTEMPT_UNREGISTERED,
         new FinalSavingTransition(
@@ -692,6 +699,31 @@ public class RMAppImpl implements RMApp, Recoverable {
     };
   }
 
+  /**
+   * Move an app to a new queue.
+   * This transition must set the result on the Future in the RMAppMoveEvent,
+   * either as an exception for failure or null for success, or the client will
+   * be left waiting forever.
+   */
+  @SuppressWarnings("unchecked")
+  private static final class RMAppMoveTransition extends RMAppTransition {
+    public void transition(RMAppImpl app, RMAppEvent event) {
+      RMAppMoveEvent moveEvent = (RMAppMoveEvent) event;
+      try {
+        app.queue = app.scheduler.moveApplication(app.applicationId,
+            moveEvent.getTargetQueue());
+      } catch (YarnException ex) {
+        moveEvent.getResult().setException(ex);
+        return;
+      }
+      
+      // TODO: Write out change to state store (YARN-1558)
+      
+      moveEvent.getResult().set(null);
+    }
+  }
+
+  @SuppressWarnings("unchecked")
   private static final class RMAppRecoveredTransition implements
       MultipleArcTransition<RMAppImpl, RMAppEvent, RMAppState> {
 

+ 44 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMoveEvent.java

@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
+
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+
+import com.google.common.util.concurrent.SettableFuture;
+
+public class RMAppMoveEvent extends RMAppEvent {
+  private String targetQueue;
+  private SettableFuture<Object> result;
+  
+  public RMAppMoveEvent(ApplicationId id, String newQueue,
+      SettableFuture<Object> resultFuture) {
+    super(id, RMAppEventType.MOVE);
+    this.targetQueue = newQueue;
+    this.result = resultFuture;
+  }
+  
+  public String getTargetQueue() {
+    return targetQueue;
+  }
+  
+  public SettableFuture<Object> getResult() {
+    return result;
+  }
+
+}

+ 9 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java

@@ -27,11 +27,12 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 
-public class AbstractYarnScheduler {
+public abstract class AbstractYarnScheduler implements ResourceScheduler {
 
   protected RMContext rmContext;
   protected Map<ApplicationId, SchedulerApplication> applications;
@@ -61,4 +62,11 @@ public class AbstractYarnScheduler {
   public Map<ApplicationId, SchedulerApplication> getSchedulerApplications() {
     return applications;
   }
+  
+  @Override
+  public String moveApplication(ApplicationId appId, String newQueue)
+      throws YarnException {
+    throw new YarnException(getClass().getSimpleName()
+        + " does not support moving apps between queues");
+  }
 }

+ 14 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java

@@ -28,6 +28,7 @@ import org.apache.hadoop.classification.InterfaceStability.Stable;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.NodeId;
@@ -38,6 +39,7 @@ import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
+import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
 
 /**
@@ -180,4 +182,16 @@ public interface YarnScheduler extends EventHandler<SchedulerEvent> {
   @LimitedPrivate("yarn")
   @Unstable
   public RMContainer getRMContainer(ContainerId containerId);
+  
+  /**
+   * Moves the given application to the given queue
+   * @param appId
+   * @param newQueue
+   * @return the name of the queue the application was placed into
+   * @throws YarnException if the move cannot be carried out
+   */
+  @LimitedPrivate("yarn")
+  @Evolving
+  public String moveApplication(ApplicationId appId, String newQueue)
+      throws YarnException;
 }

+ 2 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java

@@ -51,6 +51,7 @@ import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
@@ -75,7 +76,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnSched
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
@@ -121,8 +121,7 @@ import com.google.common.annotations.VisibleForTesting;
 @LimitedPrivate("yarn")
 @Unstable
 @SuppressWarnings("unchecked")
-public class FairScheduler extends AbstractYarnScheduler implements
-    ResourceScheduler {
+public class FairScheduler extends AbstractYarnScheduler {
   private boolean initialized;
   private FairSchedulerConfiguration conf;
   private Resource minimumAllocation;

+ 1 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java

@@ -77,7 +77,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppUtils;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
@@ -106,7 +105,7 @@ import com.google.common.annotations.VisibleForTesting;
 @Evolving
 @SuppressWarnings("unchecked")
 public class FifoScheduler extends AbstractYarnScheduler implements
-    ResourceScheduler, Configurable {
+    Configurable {
 
   private static final Log LOG = LogFactory.getLog(FifoScheduler.class);
 

+ 1 - 37
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationmasterservice/TestApplicationMasterService.java → hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java

@@ -16,28 +16,21 @@
  * limitations under the License.
  */
 
-package org.apache.hadoop.yarn.server.resourcemanager.applicationmasterservice;
+package org.apache.hadoop.yarn.server.resourcemanager;
 
 import junit.framework.Assert;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.InvalidContainerReleaseException;
-import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
 import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
-import org.apache.hadoop.yarn.server.resourcemanager.MockAM;
-import org.apache.hadoop.yarn.server.resourcemanager.MockNM;
-import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
-import org.apache.hadoop.yarn.server.resourcemanager.TestFifoScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.junit.BeforeClass;
@@ -152,33 +145,4 @@ public class TestApplicationMasterService {
       }
     }
   }
-  
-  @Test (timeout = 60000)
-  public void testNotifyAMOfPlacedQueue() throws Exception {
-    // By default, FairScheduler assigns queue by user name
-    conf.setClass(YarnConfiguration.RM_SCHEDULER, FairScheduler.class,
-        ResourceScheduler.class);
-    MockRM rm = new MockRM(conf);
-    try {
-      rm.start();
-
-      // Register node1
-      MockNM nm1 = rm.registerNode("127.0.0.1:1234", 6 * GB);
-
-      // Submit an application
-      RMApp app1 = rm.submitApp(1024, "somename", "user1");
-
-      // kick the scheduling
-      nm1.nodeHeartbeat(true);
-      RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
-      MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
-      
-      RegisterApplicationMasterResponse response = am1.registerAppAttempt();
-      Assert.assertEquals("root.user1", response.getQueue());
-    } finally {
-      if (rm != null) {
-        rm.stop();
-      }
-    }
-  }
 }

+ 15 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java

@@ -58,6 +58,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.RenewDelegationTokenRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
@@ -232,6 +233,20 @@ public class TestClientRMService {
               "application " + request.getApplicationId());
     }
   }
+  
+  @Test (expected = ApplicationNotFoundException.class)
+  public void testMoveAbsentApplication() throws YarnException {
+    RMContext rmContext = mock(RMContext.class);
+    when(rmContext.getRMApps()).thenReturn(
+        new ConcurrentHashMap<ApplicationId, RMApp>());
+    ClientRMService rmService = new ClientRMService(rmContext, null, null,
+        null, null, null);
+    ApplicationId applicationId =
+        BuilderUtils.newApplicationId(System.currentTimeMillis(), 0);
+    MoveApplicationAcrossQueuesRequest request =
+        MoveApplicationAcrossQueuesRequest.newInstance(applicationId, "newqueue");
+    rmService.moveApplicationAcrossQueues(request);
+  }
 
   @Test
   public void testGetQueueInfo() throws Exception {

+ 180 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestMoveApplication.java

@@ -0,0 +1,180 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.server.resourcemanager;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+import java.security.AccessControlException;
+import java.security.PrivilegedExceptionAction;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.QueueACL;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestMoveApplication {
+  private ResourceManager resourceManager = null;
+  private static boolean failMove;
+  
+  @Before
+  public void setUp() throws Exception {
+    Configuration conf = new YarnConfiguration();
+    conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoSchedulerWithMove.class,
+        FifoSchedulerWithMove.class);
+    conf.set(YarnConfiguration.YARN_ADMIN_ACL, " ");
+    conf.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, true);
+    resourceManager = new ResourceManager();
+    resourceManager.init(conf);
+    resourceManager.getRMContainerTokenSecretManager().rollMasterKey();
+    resourceManager.getRMNMTokenSecretManager().rollMasterKey();
+    resourceManager.start();
+    failMove = false;
+  }
+  
+  @After
+  public void tearDown() {
+    resourceManager.stop();
+  }
+  
+  @Test
+  public void testMoveRejectedByScheduler() throws Exception {
+    failMove = true;
+    
+    // Submit application
+    Application application = new Application("user1", resourceManager);
+    application.submit();
+
+    ClientRMService clientRMService = resourceManager.getClientRMService();
+    try {
+      // FIFO scheduler does not support moves
+      clientRMService.moveApplicationAcrossQueues(
+          MoveApplicationAcrossQueuesRequest.newInstance(
+              application.getApplicationId(), "newqueue"));
+      fail("Should have hit exception");
+    } catch (YarnException ex) {
+      assertEquals("Move not supported", ex.getCause().getMessage());
+    }
+  }
+  
+  @Test (timeout = 10000)
+  public void testMoveTooLate() throws Exception {
+    // Submit application
+    Application application = new Application("user1", resourceManager);
+    ApplicationId appId = application.getApplicationId();
+    application.submit();
+    
+    ClientRMService clientRMService = resourceManager.getClientRMService();
+    // Kill the application
+    clientRMService.forceKillApplication(
+        KillApplicationRequest.newInstance(appId));
+    RMApp rmApp = resourceManager.getRMContext().getRMApps().get(appId);
+    // wait until it's dead
+    while (rmApp.getState() != RMAppState.KILLED) {
+      Thread.sleep(100);
+    }
+    
+    try {
+      clientRMService.moveApplicationAcrossQueues(
+          MoveApplicationAcrossQueuesRequest.newInstance(appId, "newqueue"));
+      fail("Should have hit exception");
+    } catch (YarnException ex) {
+      assertEquals(YarnException.class,
+          ex.getClass());
+      assertEquals("App in KILLED state cannot be moved.", ex.getMessage());
+    }
+  }
+  
+  @Test (timeout = 5000)
+  public void testMoveSuccessful() throws Exception {
+    // Submit application
+    Application application = new Application("user1", resourceManager);
+    ApplicationId appId = application.getApplicationId();
+    application.submit();
+    
+    // Wait for app to be accepted
+    RMApp app = resourceManager.rmContext.getRMApps().get(appId);
+    while (app.getState() != RMAppState.ACCEPTED) {
+      Thread.sleep(100);
+    }
+
+    ClientRMService clientRMService = resourceManager.getClientRMService();
+    // FIFO scheduler does not support moves
+    clientRMService.moveApplicationAcrossQueues(
+        MoveApplicationAcrossQueuesRequest.newInstance(appId, "newqueue"));
+    
+    RMApp rmApp = resourceManager.getRMContext().getRMApps().get(appId);
+    assertEquals("newqueue", rmApp.getQueue());
+  }
+  
+  @Test
+  public void testMoveRejectedByPermissions() throws Exception {
+    failMove = true;
+    
+    // Submit application
+    final Application application = new Application("user1", resourceManager);
+    application.submit();
+
+    final ClientRMService clientRMService = resourceManager.getClientRMService();
+    try {
+      UserGroupInformation.createRemoteUser("otheruser").doAs(
+          new PrivilegedExceptionAction<MoveApplicationAcrossQueuesResponse>() {
+            @Override
+            public MoveApplicationAcrossQueuesResponse run() throws Exception {
+              return clientRMService.moveApplicationAcrossQueues(
+                  MoveApplicationAcrossQueuesRequest.newInstance(
+                      application.getApplicationId(), "newqueue"));
+            }
+            
+          });
+      fail("Should have hit exception");
+    } catch (Exception ex) {
+      assertEquals(AccessControlException.class, ex.getCause().getCause().getClass());
+    }
+  }
+  
+  public static class FifoSchedulerWithMove extends FifoScheduler {
+    @Override
+    public String moveApplication(ApplicationId appId, String newQueue)
+        throws YarnException {
+      if (failMove) {
+        throw new YarnException("Move not supported");
+      }
+      return newQueue;
+    }
+    
+    
+    @Override
+    public synchronized boolean checkAccess(UserGroupInformation callerUGI,
+        QueueACL acl, String queueName) {
+      return acl != QueueACL.ADMINISTER_QUEUE;
+    }
+  }
+}

Some files were not shown because too many files changed in this diff