Bläddra i källkod

Merge branch 'trunk' into HDFS-7240

Anu Engineer 7 år sedan
förälder
incheckning
d11161bb68
100 ändrade filer med 4868 tillägg och 212 borttagningar
  1. 1 0
      LICENSE.txt
  2. 4 3
      dev-support/docker/Dockerfile
  3. 42 0
      hadoop-assemblies/src/main/resources/assemblies/hadoop-resourceestimator.xml
  4. 11 0
      hadoop-assemblies/src/main/resources/assemblies/hadoop-tools.xml
  5. 11 0
      hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_2.8.2.xml
  6. 2 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurableBase.java
  7. 3 3
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationTaskStatus.java
  8. 21 12
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java
  9. 42 13
      hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.2/CHANGES.2.8.2.md
  10. 4 1
      hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.2/RELEASENOTES.2.8.2.md
  11. 1 1
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestReconfiguration.java
  12. 1 1
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestKDiag.java
  13. 2 2
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ReconfigurationProtocolUtils.java
  14. 36 25
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/URLConnectionFactory.java
  15. 14 2
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
  16. 11 0
      hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_2.8.2.xml
  17. 7 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
  18. 1 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ReconfigurationProtocolServerSideUtils.java
  19. 14 5
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeSyncer.java
  20. 0 5
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
  21. 18 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
  22. 25 13
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectorySnapshottableFeature.java
  23. 10 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDiffInfo.java
  24. 55 8
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
  25. 7 23
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/metrics/TopMetrics.java
  26. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
  27. 11 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
  28. 3 8
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestTopMetrics.java
  29. 80 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapRootDescendantDiff.java
  30. 503 38
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java
  31. 11 0
      hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Common_2.8.2.xml
  32. 11 0
      hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Core_2.8.2.xml
  33. 11 0
      hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_JobClient_2.8.2.xml
  34. 5 5
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java
  35. 8 8
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalJobRunner.java
  36. 5 4
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/FileNameIndexUtils.java
  37. 4 3
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java
  38. 4 4
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/security/MRDelegationTokenRenewer.java
  39. 4 4
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/security/client/ClientHSTokenSelector.java
  40. 3 3
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java
  41. 4 4
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestLocalModeWithNewApis.java
  42. 4 4
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestMRWithDistributedCache.java
  43. 1 1
      hadoop-project-dist/pom.xml
  44. 5 0
      hadoop-project/pom.xml
  45. 2 0
      hadoop-project/src/site/site.xml
  46. 19 0
      hadoop-tools/hadoop-resourceestimator/README.md
  47. 174 0
      hadoop-tools/hadoop-resourceestimator/pom.xml
  48. 50 0
      hadoop-tools/hadoop-resourceestimator/src/config/checkstyle.xml
  49. 52 0
      hadoop-tools/hadoop-resourceestimator/src/main/bin/estimator.cmd
  50. 71 0
      hadoop-tools/hadoop-resourceestimator/src/main/bin/estimator.sh
  51. 37 0
      hadoop-tools/hadoop-resourceestimator/src/main/bin/start-estimator.cmd
  52. 42 0
      hadoop-tools/hadoop-resourceestimator/src/main/bin/start-estimator.sh
  53. 37 0
      hadoop-tools/hadoop-resourceestimator/src/main/bin/stop-estimator.cmd
  54. 42 0
      hadoop-tools/hadoop-resourceestimator/src/main/bin/stop-estimator.sh
  55. 85 0
      hadoop-tools/hadoop-resourceestimator/src/main/conf/resourceestimator-config.xml
  56. 2 0
      hadoop-tools/hadoop-resourceestimator/src/main/data/resourceEstimatorService.txt
  57. 95 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/api/RecurrenceId.java
  58. 211 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/api/ResourceSkyline.java
  59. 23 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/api/package-info.java
  60. 125 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/config/ResourceEstimatorConfiguration.java
  61. 81 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/config/ResourceEstimatorUtil.java
  62. 23 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/config/package-info.java
  63. 35 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/exception/ResourceEstimatorException.java
  64. 23 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/exception/package-info.java
  65. 77 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/serialization/RLESparseResourceAllocationSerDe.java
  66. 61 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/serialization/ResourceSerDe.java
  67. 24 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/serialization/package-info.java
  68. 146 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/service/ResourceEstimatorServer.java
  69. 238 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/service/ResourceEstimatorService.java
  70. 45 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/service/ShutdownHook.java
  71. 23 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/service/package-info.java
  72. 99 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/api/HistorySkylineStore.java
  73. 60 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/api/PredictionSkylineStore.java
  74. 30 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/api/SkylineStore.java
  75. 23 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/api/package-info.java
  76. 33 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/exceptions/DuplicateRecurrenceIdException.java
  77. 33 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/exceptions/EmptyResourceSkylineException.java
  78. 32 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/exceptions/NullPipelineIdException.java
  79. 33 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/exceptions/NullRLESparseResourceAllocationException.java
  80. 32 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/exceptions/NullRecurrenceIdException.java
  81. 32 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/exceptions/NullResourceSkylineException.java
  82. 33 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/exceptions/RecurrenceIdNotFoundException.java
  83. 33 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/exceptions/SkylineStoreException.java
  84. 24 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/exceptions/package-info.java
  85. 256 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/impl/InMemoryStore.java
  86. 23 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/impl/package-info.java
  87. 118 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/validator/SkylineStoreValidator.java
  88. 23 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/validator/package-info.java
  89. 76 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/api/Solver.java
  90. 23 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/api/package-info.java
  91. 34 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/exceptions/InvalidInputException.java
  92. 34 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/exceptions/InvalidSolverException.java
  93. 34 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/exceptions/SolverException.java
  94. 24 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/exceptions/package-info.java
  95. 94 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/impl/BaseSolver.java
  96. 340 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/impl/LpSolver.java
  97. 23 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/impl/package-info.java
  98. 219 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/preprocess/SolverPreprocessor.java
  99. 23 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/preprocess/package-info.java
  100. 163 0
      hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/translator/api/JobMetaData.java

+ 1 - 0
LICENSE.txt

@@ -766,6 +766,7 @@ hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/jquery.js
 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/jquery
 Apache HBase - Server which contains JQuery minified javascript library version 1.8.3
 Microsoft JDBC Driver for SQLServer - version 6.2.1.jre7
+oj! Algorithms - version 43.0
 --------------------------------------------------------------------------------
 
 Copyright 2005, 2012, 2013 jQuery Foundation and other contributors, https://jquery.org/

+ 4 - 3
dev-support/docker/Dockerfile

@@ -134,8 +134,9 @@ ENV MAVEN_HOME /usr
 
 ######
 # Install findbugs 3.0.1 (3.0.1 ships with Xenial)
+# Ant is needed for findbugs
 ######
-RUN apt-get -q update && apt-get -q install -y findbugs
+RUN apt-get -q update && apt-get -q install -y findbugs ant
 ENV FINDBUGS_HOME /usr
 
 ####
@@ -182,9 +183,9 @@ ENV MAVEN_OPTS -Xms256m -Xmx1g
 ###
 
 ####
-# Install svn, Ant, & Forrest (for Apache Hadoop website)
+# Install svn & Forrest (for Apache Hadoop website)
 ###
-RUN apt-get -q update && apt-get -q install -y ant subversion
+RUN apt-get -q update && apt-get -q install -y subversion
 
 RUN mkdir -p /opt/apache-forrest && \
     curl -L -s -S \

+ 42 - 0
hadoop-assemblies/src/main/resources/assemblies/hadoop-resourceestimator.xml

@@ -0,0 +1,42 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 http://maven.apache.org/xsd/assembly-1.1.3.xsd">
+  <id>hadoop-resourceestimator</id>
+  <formats>
+    <format>dir</format>
+  </formats>
+  <includeBaseDirectory>false</includeBaseDirectory>
+
+  <fileSets>
+    <fileSet>
+      <directory>${basedir}/src/main/bin</directory>
+      <outputDirectory>resourceestimator/bin</outputDirectory>
+      <fileMode>0755</fileMode>
+    </fileSet>
+    <fileSet>
+        <directory>${basedir}/src/main/conf</directory>
+        <outputDirectory>resourceestimator/conf</outputDirectory>
+    </fileSet>
+    <fileSet>
+        <directory>${basedir}/src/main/data</directory>
+        <outputDirectory>resourceestimator/data</outputDirectory>
+    </fileSet>
+  </fileSets>
+ </assembly>

+ 11 - 0
hadoop-assemblies/src/main/resources/assemblies/hadoop-tools.xml

@@ -174,6 +174,17 @@
       <directory>../hadoop-sls/target/hadoop-sls-${project.version}/sls</directory>
       <outputDirectory>/share/hadoop/${hadoop.component}/sls</outputDirectory>
     </fileSet>
+    <fileSet>
+      <directory>../hadoop-resourceestimator/target</directory>
+      <outputDirectory>/share/hadoop/${hadoop.component}/sources</outputDirectory>
+      <includes>
+        <include>*-sources.jar</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>../hadoop-resourceestimator/target/hadoop-resourceestimator-${project.version}/resourceestimator</directory>
+      <outputDirectory>/share/hadoop/${hadoop.component}/resourceestimator</outputDirectory>
+    </fileSet>
     <fileSet>
       <directory>../hadoop-aws/src/main/bin</directory>
       <outputDirectory>/bin</outputDirectory>

Filskillnaden har hållts tillbaka eftersom den är för stor
+ 11 - 0
hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_2.8.2.xml


+ 2 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurableBase.java

@@ -19,7 +19,6 @@
 package org.apache.hadoop.conf;
 
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Maps;
 import org.apache.hadoop.util.Time;
@@ -31,6 +30,7 @@ import java.io.IOException;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Map;
+import java.util.Optional;
 
 /**
  * Utility base class for implementing the Reconfigurable interface.
@@ -148,7 +148,7 @@ public abstract class ReconfigurableBase
         } catch (ReconfigurationException e) {
           errorMessage = e.getCause().getMessage();
         }
-        results.put(change, Optional.fromNullable(errorMessage));
+        results.put(change, Optional.ofNullable(errorMessage));
       }
 
       synchronized (parent.reconfigLock) {

+ 3 - 3
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationTaskStatus.java

@@ -18,15 +18,15 @@
 
 package org.apache.hadoop.conf;
 
-import com.google.common.base.Optional;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.ReconfigurationUtil.PropertyChange;
 
 import java.util.Map;
+import java.util.Optional;
 
-@InterfaceAudience.Public
-@InterfaceStability.Stable
+@InterfaceAudience.LimitedPrivate({"HDFS", "Management Tools"})
+@InterfaceStability.Unstable
 public class ReconfigurationTaskStatus {
   long startTime;
   long endTime;

+ 21 - 12
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java

@@ -742,18 +742,27 @@ public class NetUtils {
               + ";"
               + see("BindException"));
     } else if (exception instanceof ConnectException) {
-      // connection refused; include the host:port in the error
-      return wrapWithMessage(exception, 
-          "Call From "
-              + localHost
-              + " to "
-              + destHost
-              + ":"
-              + destPort
-              + " failed on connection exception: "
-              + exception
-              + ";"
-              + see("ConnectionRefused"));
+      // Check if client was trying to connect to an unspecified IPv4 address
+      // (0.0.0.0) or IPv6 address(0:0:0:0:0:0:0:0 or ::)
+      if ((destHost != null && (destHost.equals("0.0.0.0") ||
+          destHost.equals("0:0:0:0:0:0:0:0") || destHost.equals("::")))
+          || destPort == 0) {
+        return wrapWithMessage(exception, "Your endpoint configuration" +
+            " is wrong;" + see("UnsetHostnameOrPort"));
+      } else {
+        // connection refused; include the host:port in the error
+        return wrapWithMessage(exception,
+            "Call From "
+                + localHost
+                + " to "
+                + destHost
+                + ":"
+                + destPort
+                + " failed on connection exception: "
+                + exception
+                + ";"
+                + see("ConnectionRefused"));
+      }
     } else if (exception instanceof UnknownHostException) {
       return wrapWithMessage(exception,
           "Invalid host name: "

+ 42 - 13
hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.2/CHANGES.2.8.2.md

@@ -16,9 +16,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 -->
-# Apache Hadoop Changelog
+# "Apache Hadoop" Changelog
 
-## Release 2.8.2 - Unreleased (as of 2017-08-28)
+## Release 2.8.2 - 2017-10-24
 
 ### INCOMPATIBLE CHANGES:
 
@@ -27,6 +27,18 @@
 | [HADOOP-14174](https://issues.apache.org/jira/browse/HADOOP-14174) | Set default ADLS access token provider type to ClientCredential |  Major | fs/adl | John Zhuge | John Zhuge |
 
 
+### IMPORTANT ISSUES:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+
+
+### NEW FEATURES:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+
+
 ### IMPROVEMENTS:
 
 | JIRA | Summary | Priority | Component | Reporter | Contributor |
@@ -67,15 +79,17 @@
 | [HDFS-11881](https://issues.apache.org/jira/browse/HDFS-11881) | NameNode consumes a lot of memory for snapshot diff report generation |  Major | hdfs, snapshots | Manoj Govindassamy | Manoj Govindassamy |
 | [HDFS-12042](https://issues.apache.org/jira/browse/HDFS-12042) | Lazy initialize AbstractINodeDiffList#diffs for snapshots to reduce memory consumption |  Major | . | Misha Dmitriev | Misha Dmitriev |
 | [HDFS-12078](https://issues.apache.org/jira/browse/HDFS-12078) | Add time unit to the description of property dfs.namenode.stale.datanode.interval in hdfs-default.xml |  Minor | documentation, hdfs | Weiwei Yang | Weiwei Yang |
-| [YARN-6764](https://issues.apache.org/jira/browse/YARN-6764) | Simplify the logic in FairScheduler#attemptScheduling |  Trivial | fairscheduler | Yufei Gu | Yufei Gu |
 | [HADOOP-14629](https://issues.apache.org/jira/browse/HADOOP-14629) | Improve exception checking in FileContext related JUnit tests |  Major | fs, test | Andras Bokor | Andras Bokor |
 | [HDFS-12137](https://issues.apache.org/jira/browse/HDFS-12137) | DN dataset lock should be fair |  Critical | datanode | Daryn Sharp | Daryn Sharp |
-| [HADOOP-14521](https://issues.apache.org/jira/browse/HADOOP-14521) | KMS client needs retry logic |  Major | . | Rushabh S Shah | Rushabh S Shah |
 | [HADOOP-14659](https://issues.apache.org/jira/browse/HADOOP-14659) | UGI getShortUserName does not need to search the Subject |  Major | common | Daryn Sharp | Daryn Sharp |
 | [YARN-6768](https://issues.apache.org/jira/browse/YARN-6768) | Improve performance of yarn api record toString and fromString |  Major | . | Jonathan Eagles | Jonathan Eagles |
 | [YARN-5892](https://issues.apache.org/jira/browse/YARN-5892) | Support user-specific minimum user limit percentage in Capacity Scheduler |  Major | capacityscheduler | Eric Payne | Eric Payne |
 | [YARN-6917](https://issues.apache.org/jira/browse/YARN-6917) | Queue path is recomputed from scratch on every allocation |  Minor | capacityscheduler | Jason Lowe | Eric Payne |
 | [HDFS-12301](https://issues.apache.org/jira/browse/HDFS-12301) | NN File Browser UI: Navigate to a path when enter is pressed |  Trivial | ui | Ravi Prakash | Ravi Prakash |
+| [HADOOP-14251](https://issues.apache.org/jira/browse/HADOOP-14251) | Credential provider should handle property key deprecation |  Critical | security | John Zhuge | John Zhuge |
+| [MAPREDUCE-6937](https://issues.apache.org/jira/browse/MAPREDUCE-6937) | Backport MAPREDUCE-6870 to branch-2 while preserving compatibility |  Major | . | Zhe Zhang | Peter Bacsko |
+| [YARN-5547](https://issues.apache.org/jira/browse/YARN-5547) | NMLeveldbStateStore should be more tolerant of unknown keys |  Major | nodemanager | Jason Lowe | Ajith S |
+| [YARN-6930](https://issues.apache.org/jira/browse/YARN-6930) | Admins should be able to explicitly enable specific LinuxContainerRuntime in the NodeManager |  Major | nodemanager | Vinod Kumar Vavilapalli | Shane Kumpf |
 
 
 ### BUG FIXES:
@@ -109,7 +123,7 @@
 | [YARN-6137](https://issues.apache.org/jira/browse/YARN-6137) | Yarn client implicitly invoke ATS client which accesses HDFS |  Major | . | Yesha Vora | Li Lu |
 | [HADOOP-13433](https://issues.apache.org/jira/browse/HADOOP-13433) | Race in UGI.reloginFromKeytab |  Major | security | Duo Zhang | Duo Zhang |
 | [HADOOP-13119](https://issues.apache.org/jira/browse/HADOOP-13119) | Add ability to secure log servlet using proxy users |  Major | . | Jeffrey E  Rodriguez | Yuanbo Liu |
-| [HADOOP-14058](https://issues.apache.org/jira/browse/HADOOP-14058) | Fix NativeS3FileSystemContractBaseTest#testDirWithDifferentMarkersWorks |  Major | fs/s3, test | Akira Ajisaka | Yiqun Lin |
+| [HADOOP-14058](https://issues.apache.org/jira/browse/HADOOP-14058) | Fix NativeS3FileSystemContractBaseTest#testDirWithDifferentMarkersWorks |  Minor | fs/s3, test | Akira Ajisaka | Yiqun Lin |
 | [HDFS-11084](https://issues.apache.org/jira/browse/HDFS-11084) | Add a regression test for sticky bit support of OIV ReverseXML processor |  Major | tools | Wei-Chiu Chuang | Wei-Chiu Chuang |
 | [HDFS-11391](https://issues.apache.org/jira/browse/HDFS-11391) | Numeric usernames do no work with WebHDFS FS (write access) |  Major | webhdfs | Pierre Villard | Pierre Villard |
 | [HDFS-11177](https://issues.apache.org/jira/browse/HDFS-11177) | 'storagepolicies -getStoragePolicy' command should accept URI based path. |  Major | shell | Surendra Singh Lilhore | Surendra Singh Lilhore |
@@ -126,7 +140,7 @@
 | [YARN-6321](https://issues.apache.org/jira/browse/YARN-6321) | TestResources test timeouts are too aggressive |  Major | test | Jason Lowe | Eric Badger |
 | [HDFS-11512](https://issues.apache.org/jira/browse/HDFS-11512) | Increase timeout on TestShortCircuitLocalRead#testSkipWithVerifyChecksum |  Minor | . | Eric Badger | Eric Badger |
 | [HDFS-11499](https://issues.apache.org/jira/browse/HDFS-11499) | Decommissioning stuck because of failing recovery |  Major | hdfs, namenode | Lukas Majercak | Lukas Majercak |
-| [HDFS-11395](https://issues.apache.org/jira/browse/HDFS-11395) | RequestHedgingProxyProvider#RequestHedgingInvocationHandler hides the Exception thrown from NameNode |  Major | ha | Nandakumar | Nandakumar |
+| [HDFS-11395](https://issues.apache.org/jira/browse/HDFS-11395) | RequestHedgingProxyProvider#RequestHedgingInvocationHandler hides the Exception thrown from NameNode |  Major | ha | Nanda kumar | Nanda kumar |
 | [YARN-4051](https://issues.apache.org/jira/browse/YARN-4051) | ContainerKillEvent lost when container is still recovering and application finishes |  Critical | nodemanager | sandflee | sandflee |
 | [YARN-6217](https://issues.apache.org/jira/browse/YARN-6217) | TestLocalCacheDirectoryManager test timeout is too aggressive |  Major | test | Jason Lowe | Miklos Szegedi |
 | [HDFS-11132](https://issues.apache.org/jira/browse/HDFS-11132) | Allow AccessControlException in contract tests when getFileStatus on subdirectory of existing files |  Major | fs/adl, test | Vishwajeet Dusane | Vishwajeet Dusane |
@@ -146,11 +160,10 @@
 | [HADOOP-14247](https://issues.apache.org/jira/browse/HADOOP-14247) | FileContextMainOperationsBaseTest should clean up test root path |  Minor | fs, test | Mingliang Liu | Mingliang Liu |
 | [YARN-6352](https://issues.apache.org/jira/browse/YARN-6352) | Header injections are possible in application proxy servlet |  Major | resourcemanager, security | Naganarasimha G R | Naganarasimha G R |
 | [MAPREDUCE-6873](https://issues.apache.org/jira/browse/MAPREDUCE-6873) | MR Job Submission Fails if MR framework application path not on defaultFS |  Minor | mrv2 | Erik Krogen | Erik Krogen |
-| [HADOOP-14256](https://issues.apache.org/jira/browse/HADOOP-14256) | [S3A DOC] Correct the format for "Seoul" example |  Minor | documentation, s3 | Brahma Reddy Battula | Brahma Reddy Battula |
+| [HADOOP-14256](https://issues.apache.org/jira/browse/HADOOP-14256) | [S3A DOC] Correct the format for "Seoul" example |  Minor | documentation, fs/s3 | Brahma Reddy Battula | Brahma Reddy Battula |
 | [MAPREDUCE-6850](https://issues.apache.org/jira/browse/MAPREDUCE-6850) | Shuffle Handler keep-alive connections are closed from the server side |  Major | . | Jonathan Eagles | Jonathan Eagles |
 | [HDFS-11592](https://issues.apache.org/jira/browse/HDFS-11592) | Closing a file has a wasteful preconditions in NameNode |  Major | namenode | Eric Badger | Eric Badger |
 | [YARN-6354](https://issues.apache.org/jira/browse/YARN-6354) | LeveldbRMStateStore can parse invalid keys when recovering reservations |  Major | resourcemanager | Jason Lowe | Jason Lowe |
-| [YARN-5703](https://issues.apache.org/jira/browse/YARN-5703) | ReservationAgents are not correctly configured |  Major | capacity scheduler, resourcemanager | Sean Po | Manikandan R |
 | [HADOOP-14268](https://issues.apache.org/jira/browse/HADOOP-14268) | Fix markdown itemization in hadoop-aws documents |  Minor | documentation, fs/s3 | Akira Ajisaka | Akira Ajisaka |
 | [YARN-6436](https://issues.apache.org/jira/browse/YARN-6436) | TestSchedulingPolicy#testParseSchedulingPolicy timeout is too low |  Major | test | Jason Lowe | Eric Badger |
 | [YARN-6420](https://issues.apache.org/jira/browse/YARN-6420) | RM startup failure due to wrong order in nodelabel editlog |  Critical | . | Bibin A Chundatt | Bibin A Chundatt |
@@ -164,7 +177,6 @@
 | [HADOOP-14066](https://issues.apache.org/jira/browse/HADOOP-14066) | VersionInfo should be marked as public API |  Critical | common | Thejas M Nair | Akira Ajisaka |
 | [HADOOP-14293](https://issues.apache.org/jira/browse/HADOOP-14293) | Initialize FakeTimer with a less trivial value |  Major | test | Andrew Wang | Andrew Wang |
 | [YARN-6461](https://issues.apache.org/jira/browse/YARN-6461) | TestRMAdminCLI has very low test timeouts |  Major | test | Jason Lowe | Eric Badger |
-| [YARN-6463](https://issues.apache.org/jira/browse/YARN-6463) | correct spelling mistake in FileSystemRMStateStore |  Trivial | . | Yeliang Cang | Yeliang Cang |
 | [HDFS-11163](https://issues.apache.org/jira/browse/HDFS-11163) | Mover should move the file blocks to default storage once policy is unset |  Major | balancer & mover | Surendra Singh Lilhore | Surendra Singh Lilhore |
 | [YARN-6450](https://issues.apache.org/jira/browse/YARN-6450) | TestContainerManagerWithLCE requires override for each new test added to ContainerManagerTest |  Major | test | Jason Lowe | Jason Lowe |
 | [YARN-3760](https://issues.apache.org/jira/browse/YARN-3760) | FSDataOutputStream leak in AggregatedLogFormat.LogWriter.close() |  Critical | nodemanager | Daryn Sharp | Haibo Chen |
@@ -219,7 +231,7 @@
 | [HADOOP-14166](https://issues.apache.org/jira/browse/HADOOP-14166) | Reset the DecayRpcScheduler AvgResponseTime metric to zero when queue is not used |  Major | common | Surendra Singh Lilhore | Surendra Singh Lilhore |
 | [HDFS-11661](https://issues.apache.org/jira/browse/HDFS-11661) | GetContentSummary uses excessive amounts of memory |  Blocker | namenode | Nathan Roberts | Wei-Chiu Chuang |
 | [YARN-6141](https://issues.apache.org/jira/browse/YARN-6141) | ppc64le on Linux doesn't trigger \_\_linux get\_executable codepath |  Major | nodemanager | Sonia Garudi | Ayappan |
-| [HDFS-11445](https://issues.apache.org/jira/browse/HDFS-11445) | FSCK shows overall health stauts as corrupt even one replica is corrupt |  Critical | . | Brahma Reddy Battula | Brahma Reddy Battula |
+| [HDFS-11445](https://issues.apache.org/jira/browse/HDFS-11445) | FSCK shows overall health status as corrupt even one replica is corrupt |  Critical | . | Brahma Reddy Battula | Brahma Reddy Battula |
 | [YARN-6643](https://issues.apache.org/jira/browse/YARN-6643) | TestRMFailover fails rarely due to port conflict |  Major | test | Robert Kanter | Robert Kanter |
 | [HDFS-11817](https://issues.apache.org/jira/browse/HDFS-11817) | A faulty node can cause a lease leak and NPE on accessing data |  Critical | . | Kihwal Lee | Kihwal Lee |
 | [YARN-6641](https://issues.apache.org/jira/browse/YARN-6641) | Non-public resource localization on a bad disk causes subsequent containers failure |  Major | . | Kuhu Shukla | Kuhu Shukla |
@@ -242,16 +254,14 @@
 | [HADOOP-14512](https://issues.apache.org/jira/browse/HADOOP-14512) | WASB atomic rename should not throw exception if the file is neither in src nor in dst when doing the rename |  Major | fs/azure | Duo Xu | Duo Xu |
 | [YARN-6585](https://issues.apache.org/jira/browse/YARN-6585) | RM fails to start when upgrading from 2.7 to 2.8 for clusters with node labels. |  Blocker | . | Eric Payne | Sunil G |
 | [HDFS-11967](https://issues.apache.org/jira/browse/HDFS-11967) | TestJMXGet fails occasionally |  Major | . | Arpit Agarwal | Arpit Agarwal |
-| [HDFS-11947](https://issues.apache.org/jira/browse/HDFS-11947) | When constructing a thread name, BPOfferService may print a bogus warning message |  Minor | datanode | Tsz Wo Nicholas Sze | Weiwei Yang |
 | [YARN-6719](https://issues.apache.org/jira/browse/YARN-6719) | Fix findbugs warnings in SLSCapacityScheduler.java |  Major | . | Akira Ajisaka | Akira Ajisaka |
 | [HADOOP-14540](https://issues.apache.org/jira/browse/HADOOP-14540) | Replace MRv1 specific terms in HostsFileReader |  Minor | documentation | Akira Ajisaka | hu xiaodong |
-| [HDFS-11995](https://issues.apache.org/jira/browse/HDFS-11995) | HDFS Architecture documentation incorrectly describes writing to a local temporary file. |  Minor | documentation | Chris Nauroth | Nandakumar |
+| [HDFS-11995](https://issues.apache.org/jira/browse/HDFS-11995) | HDFS Architecture documentation incorrectly describes writing to a local temporary file. |  Minor | documentation | Chris Nauroth | Nanda kumar |
 | [HDFS-11736](https://issues.apache.org/jira/browse/HDFS-11736) | OIV tests should not write outside 'target' directory. |  Major | . | Konstantin Shvachko | Yiqun Lin |
 | [YARN-6713](https://issues.apache.org/jira/browse/YARN-6713) | Fix dead link in the Javadoc of FairSchedulerEventLog.java |  Minor | documentation | Akira Ajisaka | Weiwei Yang |
 | [HADOOP-14533](https://issues.apache.org/jira/browse/HADOOP-14533) | Size of args cannot be less than zero in TraceAdmin#run as its linkedlist |  Trivial | common, tracing | Weisen Han | Weisen Han |
 | [HDFS-11960](https://issues.apache.org/jira/browse/HDFS-11960) | Successfully closed files can stay under-replicated. |  Critical | . | Kihwal Lee | Kihwal Lee |
 | [HADOOP-14146](https://issues.apache.org/jira/browse/HADOOP-14146) | KerberosAuthenticationHandler should authenticate with SPN in AP-REQ |  Major | security | Daryn Sharp | Daryn Sharp |
-| [YARN-5876](https://issues.apache.org/jira/browse/YARN-5876) | TestResourceTrackerService#testGracefulDecommissionWithApp fails intermittently on trunk |  Major | . | Varun Saxena | Robert Kanter |
 | [YARN-6467](https://issues.apache.org/jira/browse/YARN-6467) | CSQueueMetrics needs to update the current metrics for default partition only |  Major | capacity scheduler | Naganarasimha G R | Manikandan R |
 | [HADOOP-14024](https://issues.apache.org/jira/browse/HADOOP-14024) | KMS JMX endpoint throws ClassNotFoundException |  Critical | kms | Andrew Wang | John Zhuge |
 | [YARN-6749](https://issues.apache.org/jira/browse/YARN-6749) | TestAppSchedulingInfo.testPriorityAccounting fails consistently |  Major | . | Eric Badger | Naganarasimha G R |
@@ -265,6 +275,7 @@
 | [MAPREDUCE-6246](https://issues.apache.org/jira/browse/MAPREDUCE-6246) | DBOutputFormat.java appending extra semicolon to query which is incompatible with DB2 |  Major | mrv1, mrv2 | ramtin | Gergely Novák |
 | [YARN-6428](https://issues.apache.org/jira/browse/YARN-6428) | Queue AM limit is not honored  in CS always |  Major | . | Bibin A Chundatt | Bibin A Chundatt |
 | [YARN-6770](https://issues.apache.org/jira/browse/YARN-6770) | [Docs] A small mistake in the example of TimelineClient |  Trivial | docs | Jinjiang Ling | Jinjiang Ling |
+| [HADOOP-10829](https://issues.apache.org/jira/browse/HADOOP-10829) | Iteration on CredentialProviderFactory.serviceLoader  is thread-unsafe |  Major | security | Benoy Antony | Benoy Antony |
 | [YARN-6809](https://issues.apache.org/jira/browse/YARN-6809) | Fix typo in ResourceManagerHA.md |  Trivial | documentation | Akira Ajisaka | Yeliang Cang |
 | [MAPREDUCE-5621](https://issues.apache.org/jira/browse/MAPREDUCE-5621) | mr-jobhistory-daemon.sh doesn't have to execute mkdir and chown all the time |  Minor | jobhistoryserver | Shinichi Yamashita | Shinichi Yamashita |
 | [YARN-6797](https://issues.apache.org/jira/browse/YARN-6797) | TimelineWriter does not fully consume the POST response |  Major | timelineclient | Jason Lowe | Jason Lowe |
@@ -306,6 +317,21 @@
 | [YARN-7087](https://issues.apache.org/jira/browse/YARN-7087) | NM failed to perform log aggregation due to absent container |  Blocker | log-aggregation | Jason Lowe | Jason Lowe |
 | [YARN-7051](https://issues.apache.org/jira/browse/YARN-7051) | Avoid concurrent modification exception in FifoIntraQueuePreemptionPlugin |  Critical | capacity scheduler, scheduler preemption, yarn | Eric Payne | Eric Payne |
 | [HDFS-12364](https://issues.apache.org/jira/browse/HDFS-12364) | [branch-2.8.2] Fix the Compile Error after HDFS-12299 |  Blocker | hdfs | Jiandan Yang | Jiandan Yang |
+| [YARN-7112](https://issues.apache.org/jira/browse/YARN-7112) | TestAMRMProxy is failing with invalid request |  Major | . | Jason Lowe | Jason Lowe |
+| [YARN-7076](https://issues.apache.org/jira/browse/YARN-7076) | yarn application -list -appTypes \<appType\> is not working |  Blocker | . | Jian He | Jian He |
+| [MAPREDUCE-6641](https://issues.apache.org/jira/browse/MAPREDUCE-6641) | TestTaskAttempt fails in trunk |  Major | test | Tsuyoshi Ozawa | Haibo Chen |
+| [YARN-7083](https://issues.apache.org/jira/browse/YARN-7083) | Log aggregation deletes/renames while file is open |  Critical | nodemanager | Daryn Sharp | Jason Lowe |
+| [HADOOP-14814](https://issues.apache.org/jira/browse/HADOOP-14814) | Fix incompatible API change on FsServerDefaults to HADOOP-14104 |  Blocker | . | Junping Du | Junping Du |
+| [YARN-7023](https://issues.apache.org/jira/browse/YARN-7023) | Incorrect ReservationId.compareTo() implementation |  Minor | reservation system | Oleg Danilov | Oleg Danilov |
+| [HADOOP-14842](https://issues.apache.org/jira/browse/HADOOP-14842) | Hadoop 2.8.2 release build process get stuck due to java issue |  Blocker | build | Junping Du | Junping Du |
+| [YARN-5195](https://issues.apache.org/jira/browse/YARN-5195) | RM intermittently crashed with NPE while handling APP\_ATTEMPT\_REMOVED event when async-scheduling enabled in CapacityScheduler |  Major | resourcemanager | Karam Singh | sandflee |
+| [YARN-7034](https://issues.apache.org/jira/browse/YARN-7034) | DefaultLinuxContainerRuntime and DockerLinuxContainerRuntime sends client environment variables to container-executor |  Blocker | nodemanager | Miklos Szegedi | Miklos Szegedi |
+| [YARN-7249](https://issues.apache.org/jira/browse/YARN-7249) | Fix CapacityScheduler NPE issue when a container preempted while the node is being removed |  Blocker | . | Wangda Tan | Wangda Tan |
+| [YARN-7325](https://issues.apache.org/jira/browse/YARN-7325) | Remove unused container variable in DockerLinuxContainerRuntime |  Minor | nodemanager | Shane Kumpf | Shane Kumpf |
+| [YARN-7246](https://issues.apache.org/jira/browse/YARN-7246) | Fix the default docker binary path |  Blocker | nodemanager | Shane Kumpf | Shane Kumpf |
+| [YARN-7333](https://issues.apache.org/jira/browse/YARN-7333) | container-executor fails to remove entries from a directory that is not writable or executable |  Critical | . | Jason Lowe | Jason Lowe |
+| [YARN-7230](https://issues.apache.org/jira/browse/YARN-7230) | Document DockerContainerRuntime for branch-2.8 with proper scope and claim as an experimental feature |  Blocker | documentation | Junping Du | Shane Kumpf |
+| [HADOOP-14958](https://issues.apache.org/jira/browse/HADOOP-14958) | CLONE - Fix source-level compatibility after HADOOP-11252 |  Blocker | . | Junping Du | Junping Du |
 
 
 ### TESTS:
@@ -346,6 +372,7 @@
 | [YARN-2113](https://issues.apache.org/jira/browse/YARN-2113) | Add cross-user preemption within CapacityScheduler's leaf-queue |  Major | capacity scheduler | Vinod Kumar Vavilapalli | Sunil G |
 | [YARN-6775](https://issues.apache.org/jira/browse/YARN-6775) | CapacityScheduler: Improvements to assignContainers, avoid unnecessary canAssignToUser/Queue calls |  Major | capacityscheduler | Nathan Roberts | Nathan Roberts |
 | [YARN-6988](https://issues.apache.org/jira/browse/YARN-6988) | container-executor fails for docker when command length \> 4096 B |  Major | yarn | Eric Badger | Eric Badger |
+| [HDFS-12473](https://issues.apache.org/jira/browse/HDFS-12473) | Change hosts JSON file format |  Major | . | Ming Ma | Ming Ma |
 
 
 ### OTHER:
@@ -354,3 +381,5 @@
 |:---- |:---- | :--- |:---- |:---- |:---- |
 | [HADOOP-14344](https://issues.apache.org/jira/browse/HADOOP-14344) | Revert HADOOP-13606 swift FS to add a service load metadata file |  Major | . | John Zhuge | John Zhuge |
 | [HDFS-11717](https://issues.apache.org/jira/browse/HDFS-11717) | Add unit test for HDFS-11709 StandbyCheckpointer should handle non-existing legacyOivImageDir gracefully |  Minor | ha, namenode | Erik Krogen | Erik Krogen |
+
+

+ 4 - 1
hadoop-common-project/hadoop-common/src/site/markdown/release/2.8.2/RELEASENOTES.2.8.2.md

@@ -16,7 +16,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 -->
-# Apache Hadoop  2.8.2 Release Notes
+# "Apache Hadoop"  2.8.2 Release Notes
 
 These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements.
 
@@ -73,3 +73,6 @@ Reverted HDFS-10797 to fix a scalability regression brought by the commit.
 * [HDFS-10326](https://issues.apache.org/jira/browse/HDFS-10326) | *Major* | **Disable setting tcp socket send/receive buffers for write pipelines**
 
 The size of the TCP socket buffers are no longer hardcoded by default. Instead the OS now will automatically tune the size for the buffer.
+
+
+

+ 1 - 1
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestReconfiguration.java

@@ -18,7 +18,6 @@
 
 package org.apache.hadoop.conf;
 
-import com.google.common.base.Optional;
 import com.google.common.base.Supplier;
 import com.google.common.collect.Lists;
 import org.apache.hadoop.test.GenericTestUtils;
@@ -44,6 +43,7 @@ import java.util.Collection;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeoutException;
 

+ 1 - 1
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestKDiag.java

@@ -67,7 +67,7 @@ public class TestKDiag extends Assert {
 
   @BeforeClass
   public static void startMiniKdc() throws Exception {
-    workDir = new File(System.getProperty("test.dir", "target"));
+    workDir = GenericTestUtils.getTestDir(TestKDiag.class.getSimpleName());
     securityProperties = MiniKdc.createConf();
     kdc = new MiniKdc(securityProperties, workDir);
     kdc.start();

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ReconfigurationProtocolUtils.java

@@ -19,13 +19,13 @@
 package org.apache.hadoop.hdfs.protocolPB;
 
 import java.util.Map;
+import java.util.Optional;
 
 import org.apache.hadoop.conf.ReconfigurationTaskStatus;
 import org.apache.hadoop.conf.ReconfigurationUtil.PropertyChange;
 import org.apache.hadoop.hdfs.protocol.proto.ReconfigurationProtocolProtos.GetReconfigurationStatusConfigChangeProto;
 import org.apache.hadoop.hdfs.protocol.proto.ReconfigurationProtocolProtos.GetReconfigurationStatusResponseProto;
 
-import com.google.common.base.Optional;
 import com.google.common.collect.Maps;
 
 /**
@@ -56,7 +56,7 @@ public final class ReconfigurationProtocolUtils {
         if (change.hasErrorMessage()) {
           errorMessage = change.getErrorMessage();
         }
-        statusMap.put(pc, Optional.fromNullable(errorMessage));
+        statusMap.put(pc, Optional.ofNullable(errorMessage));
       }
     }
     return new ReconfigurationTaskStatus(startTime, endTime, statusMap);

+ 36 - 25
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/URLConnectionFactory.java

@@ -23,7 +23,6 @@ import java.net.HttpURLConnection;
 import java.net.URL;
 import java.net.URLConnection;
 import java.security.GeneralSecurityException;
-import java.util.concurrent.TimeUnit;
 
 import javax.net.ssl.HostnameVerifier;
 import javax.net.ssl.HttpsURLConnection;
@@ -32,7 +31,6 @@ import javax.net.ssl.SSLSocketFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
 import org.apache.hadoop.hdfs.web.oauth2.OAuth2ConnectionConfigurator;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.authentication.client.AuthenticatedURL;
@@ -84,22 +82,48 @@ public class URLConnectionFactory {
    */
   public static URLConnectionFactory newDefaultURLConnectionFactory(
       Configuration conf) {
-    ConnectionConfigurator conn = getSSLConnectionConfiguration(conf);
+    ConnectionConfigurator conn = getSSLConnectionConfiguration(
+        DEFAULT_SOCKET_TIMEOUT, DEFAULT_SOCKET_TIMEOUT, conf);
 
     return new URLConnectionFactory(conn);
   }
 
+  /**
+   * Construct a new URLConnectionFactory based on the configuration. It will
+   * hornor connecTimeout and readTimeout when they are specified.
+   */
+  public static URLConnectionFactory newDefaultURLConnectionFactory(
+      int connectTimeout, int readTimeout, Configuration conf) {
+    ConnectionConfigurator conn = getSSLConnectionConfiguration(
+        connectTimeout, readTimeout, conf);
+    return new URLConnectionFactory(conn);
+  }
+
   private static ConnectionConfigurator getSSLConnectionConfiguration(
-      Configuration conf) {
+      final int connectTimeout, final int readTimeout, Configuration conf) {
     ConnectionConfigurator conn;
     try {
-      conn = newSslConnConfigurator(DEFAULT_SOCKET_TIMEOUT, conf);
+      conn = newSslConnConfigurator(connectTimeout, readTimeout, conf);
     } catch (Exception e) {
       LOG.warn(
           "Cannot load customized ssl related configuration. Fallback to" +
               " system-generic settings.",
           e);
-      conn = DEFAULT_TIMEOUT_CONN_CONFIGURATOR;
+      if (connectTimeout == DEFAULT_SOCKET_TIMEOUT &&
+          readTimeout == DEFAULT_SOCKET_TIMEOUT) {
+        conn = DEFAULT_TIMEOUT_CONN_CONFIGURATOR;
+      } else {
+        conn = new ConnectionConfigurator() {
+          @Override
+          public HttpURLConnection configure(HttpURLConnection connection)
+              throws IOException {
+            URLConnectionFactory.setTimeouts(connection,
+                connectTimeout,
+                readTimeout);
+            return connection;
+          }
+        };
+      }
     }
 
     return conn;
@@ -110,11 +134,12 @@ public class URLConnectionFactory {
    * It will also try to load the SSL configuration when they are specified.
    */
   public static URLConnectionFactory newOAuth2URLConnectionFactory(
-      Configuration conf) throws IOException {
+      int connectTimeout, int readTimeout, Configuration conf)
+      throws IOException {
     ConnectionConfigurator conn;
     try {
       ConnectionConfigurator sslConnConfigurator
-          = newSslConnConfigurator(DEFAULT_SOCKET_TIMEOUT, conf);
+          = newSslConnConfigurator(connectTimeout, readTimeout, conf);
 
       conn = new OAuth2ConnectionConfigurator(conf, sslConnConfigurator);
     } catch (Exception e) {
@@ -128,33 +153,18 @@ public class URLConnectionFactory {
     this.connConfigurator = connConfigurator;
   }
 
-  /**
-   * Create a new ConnectionConfigurator for SSL connections
-   */
   private static ConnectionConfigurator newSslConnConfigurator(
-      final int defaultTimeout, Configuration conf)
+      final int connectTimeout, final int readTimeout, Configuration conf)
       throws IOException, GeneralSecurityException {
     final SSLFactory factory;
     final SSLSocketFactory sf;
     final HostnameVerifier hv;
-    final int connectTimeout;
-    final int readTimeout;
 
     factory = new SSLFactory(SSLFactory.Mode.CLIENT, conf);
     factory.init();
     sf = factory.createSSLSocketFactory();
     hv = factory.getHostnameVerifier();
 
-    connectTimeout = (int) conf.getTimeDuration(
-        HdfsClientConfigKeys.DFS_WEBHDFS_SOCKET_CONNECT_TIMEOUT_KEY,
-        defaultTimeout,
-        TimeUnit.MILLISECONDS);
-
-    readTimeout = (int) conf.getTimeDuration(
-        HdfsClientConfigKeys.DFS_WEBHDFS_SOCKET_READ_TIMEOUT_KEY,
-        defaultTimeout,
-        TimeUnit.MILLISECONDS);
-
     return new ConnectionConfigurator() {
       @Override
       public HttpURLConnection configure(HttpURLConnection conn)
@@ -222,7 +232,8 @@ public class URLConnectionFactory {
    *
    * @param connection
    *          URLConnection to set
-   * @param socketTimeout
+   * @param connectTimeout
+   * @param readTimeout
    *          the connection and read timeout of the connection.
    */
   private static void setTimeouts(URLConnection connection,

+ 14 - 2
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java

@@ -46,6 +46,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.StringTokenizer;
+import java.util.concurrent.TimeUnit;
 
 import javax.ws.rs.core.HttpHeaders;
 import javax.ws.rs.core.MediaType;
@@ -193,6 +194,17 @@ public class WebHdfsFileSystem extends FileSystem
         HdfsClientConfigKeys.DFS_WEBHDFS_ACL_PERMISSION_PATTERN_KEY,
         HdfsClientConfigKeys.DFS_WEBHDFS_ACL_PERMISSION_PATTERN_DEFAULT));
 
+    int connectTimeout = (int) conf.getTimeDuration(
+        HdfsClientConfigKeys.DFS_WEBHDFS_SOCKET_CONNECT_TIMEOUT_KEY,
+        URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT,
+        TimeUnit.MILLISECONDS);
+
+    int readTimeout = (int) conf.getTimeDuration(
+        HdfsClientConfigKeys.DFS_WEBHDFS_SOCKET_READ_TIMEOUT_KEY,
+        URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT,
+        TimeUnit.MILLISECONDS);
+
+
     boolean isOAuth = conf.getBoolean(
         HdfsClientConfigKeys.DFS_WEBHDFS_OAUTH_ENABLED_KEY,
         HdfsClientConfigKeys.DFS_WEBHDFS_OAUTH_ENABLED_DEFAULT);
@@ -200,11 +212,11 @@ public class WebHdfsFileSystem extends FileSystem
     if(isOAuth) {
       LOG.debug("Enabling OAuth2 in WebHDFS");
       connectionFactory = URLConnectionFactory
-          .newOAuth2URLConnectionFactory(conf);
+          .newOAuth2URLConnectionFactory(connectTimeout, readTimeout, conf);
     } else {
       LOG.debug("Not enabling OAuth2 in WebHDFS");
       connectionFactory = URLConnectionFactory
-          .newDefaultURLConnectionFactory(conf);
+          .newDefaultURLConnectionFactory(connectTimeout, readTimeout, conf);
     }
 
 

Filskillnaden har hållts tillbaka eftersom den är för stor
+ 11 - 0
hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_2.8.2.xml


+ 7 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

@@ -374,6 +374,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE = "dfs.namenode.snapshot.skip.capture.accesstime-only-change";
   public static final boolean DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE_DEFAULT = false;
 
+  public static final String
+      DFS_NAMENODE_SNAPSHOT_DIFF_ALLOW_SNAP_ROOT_DESCENDANT =
+      "dfs.namenode.snapshotdiff.allow.snap-root-descendant";
+  public static final boolean
+      DFS_NAMENODE_SNAPSHOT_DIFF_ALLOW_SNAP_ROOT_DESCENDANT_DEFAULT =
+      true;
+
   // Whether to enable datanode's stale state detection and usage for reads
   public static final String DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY = "dfs.namenode.avoid.read.stale.datanode";
   public static final boolean DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_DEFAULT = false;

+ 1 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ReconfigurationProtocolServerSideUtils.java

@@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.protocolPB;
 
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
 
 import org.apache.hadoop.conf.ReconfigurationTaskStatus;
 import org.apache.hadoop.conf.ReconfigurationUtil.PropertyChange;
@@ -26,8 +27,6 @@ import org.apache.hadoop.hdfs.protocol.proto.ReconfigurationProtocolProtos.GetRe
 import org.apache.hadoop.hdfs.protocol.proto.ReconfigurationProtocolProtos.GetReconfigurationStatusResponseProto;
 import org.apache.hadoop.hdfs.protocol.proto.ReconfigurationProtocolProtos.ListReconfigurablePropertiesResponseProto;
 
-import com.google.common.base.Optional;
-
 /**
  * This is a server side utility class that handles
  * common logic to to parameter reconfiguration.

+ 14 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeSyncer.java

@@ -309,14 +309,22 @@ public class JournalNodeSyncer {
         boolean success = false;
         try {
           if (remoteJNproxy.httpServerUrl == null) {
-            remoteJNproxy.httpServerUrl = getHttpServerURI("http",
-                remoteJNproxy.jnAddr.getHostName(), response.getHttpPort());
+            if (response.hasFromURL()) {
+              remoteJNproxy.httpServerUrl = getHttpServerURI(
+                  response.getFromURL(), remoteJNproxy.jnAddr.getHostName());
+            } else {
+              LOG.error("EditLogManifest response does not have fromUrl " +
+                  "field set. Aborting current sync attempt");
+              break;
+            }
           }
 
           String urlPath = GetJournalEditServlet.buildPath(jid, missingLog
               .getStartTxId(), nsInfo, false);
           url = new URL(remoteJNproxy.httpServerUrl, urlPath);
           success = downloadMissingLogSegment(url, missingLog);
+        } catch (URISyntaxException e) {
+          LOG.error("EditLogManifest's fromUrl field syntax incorrect", e);
         } catch (MalformedURLException e) {
           LOG.error("MalformedURL when download missing log segment", e);
         } catch (Exception e) {
@@ -374,9 +382,10 @@ public class JournalNodeSyncer {
     return missingEditLogs;
   }
 
-  private URL getHttpServerURI(String scheme, String hostname, int port)
-    throws MalformedURLException {
-    return new URL(scheme, hostname, port, "");
+  private URL getHttpServerURI(String fromUrl, String hostAddr)
+      throws URISyntaxException, MalformedURLException {
+    URI uri = new URI(fromUrl);
+    return new URL(uri.getScheme(), hostAddr, uri.getPort(), "");
   }
 
   /**

+ 0 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -3673,11 +3673,6 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
       readUnlock(operationName);
     }
     logAuditEvent(true, operationName, src);
-    if (topConf.isEnabled && isAuditEnabled() && isExternalInvocation()
-        && dl != null && Server.getRemoteUser() != null) {
-      topMetrics.reportFilesInGetListing(Server.getRemoteUser().toString(),
-          dl.getPartialListing().length);
-    }
     return dl;
   }
 

+ 18 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java

@@ -253,6 +253,24 @@ public class INodeDirectory extends INodeWithAdditionalFields
     return getDirectorySnapshottableFeature() != null;
   }
 
+  /**
+   * Check if this directory is a descendant directory
+   * of a snapshot root directory.
+   * @param snapshotRootDir the snapshot root directory
+   * @return true if this directory is a descendant of snapshot root
+   */
+  public boolean isDescendantOfSnapshotRoot(INodeDirectory snapshotRootDir) {
+    Preconditions.checkArgument(snapshotRootDir.isSnapshottable());
+    INodeDirectory dir = this;
+    while(dir != null) {
+      if (dir.equals(snapshotRootDir)) {
+        return true;
+      }
+      dir = dir.getParent();
+    }
+    return false;
+  }
+
   public Snapshot getSnapshot(byte[] snapshotName) {
     return getDirectorySnapshottableFeature().getSnapshot(snapshotName);
   }

+ 25 - 13
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectorySnapshottableFeature.java

@@ -249,8 +249,12 @@ public class DirectorySnapshottableFeature extends DirectoryWithSnapshotFeature
 
   /**
    * Compute the difference between two snapshots (or a snapshot and the current
-   * directory) of the directory.
+   * directory) of the directory. The diff calculation can be scoped to either
+   * the snapshot root or any descendant directory under the snapshot root.
    *
+   * @param snapshotRootDir the snapshot root directory
+   * @param snapshotDiffScopeDir the descendant directory under snapshot root
+   *          to scope the diff calculation to.
    * @param from The name of the start point of the comparison. Null indicating
    *          the current tree.
    * @param to The name of the end point. Null indicating the current tree.
@@ -259,18 +263,24 @@ public class DirectorySnapshottableFeature extends DirectoryWithSnapshotFeature
    *           point, or if endSnapshotName is not null but cannot be identified
    *           as a previous snapshot.
    */
-  SnapshotDiffInfo computeDiff(final INodeDirectory snapshotRoot,
-      final String from, final String to) throws SnapshotException {
-    Snapshot fromSnapshot = getSnapshotByName(snapshotRoot, from);
-    Snapshot toSnapshot = getSnapshotByName(snapshotRoot, to);
+  SnapshotDiffInfo computeDiff(final INodeDirectory snapshotRootDir,
+      final INodeDirectory snapshotDiffScopeDir, final String from,
+      final String to) throws SnapshotException {
+    Preconditions.checkArgument(snapshotDiffScopeDir
+        .isDescendantOfSnapshotRoot(snapshotRootDir));
+    Snapshot fromSnapshot = getSnapshotByName(snapshotRootDir, from);
+    Snapshot toSnapshot = getSnapshotByName(snapshotRootDir, to);
     // if the start point is equal to the end point, return null
     if (from.equals(to)) {
       return null;
     }
-    SnapshotDiffInfo diffs = new SnapshotDiffInfo(snapshotRoot, fromSnapshot,
-        toSnapshot);
-    computeDiffRecursively(snapshotRoot, snapshotRoot, new ArrayList<byte[]>(),
-        diffs);
+    SnapshotDiffInfo diffs = new SnapshotDiffInfo(snapshotRootDir,
+        snapshotDiffScopeDir, fromSnapshot, toSnapshot);
+    // The snapshot diff scope dir is passed in as the snapshot dir
+    // so that the file paths in the diff report are relative to the
+    // snapshot scope dir.
+    computeDiffRecursively(snapshotDiffScopeDir, snapshotDiffScopeDir,
+        new ArrayList<>(), diffs);
     return diffs;
   }
 
@@ -300,13 +310,15 @@ public class DirectorySnapshottableFeature extends DirectoryWithSnapshotFeature
   /**
    * Recursively compute the difference between snapshots under a given
    * directory/file.
-   * @param snapshotRoot The directory where snapshots were taken.
+   * @param snapshotDir The directory where snapshots were taken. Can be a
+   *                    snapshot root directory or any descendant directory
+   *                    under snapshot root directory.
    * @param node The directory/file under which the diff is computed.
    * @param parentPath Relative path (corresponding to the snapshot root) of
    *                   the node's parent.
    * @param diffReport data structure used to store the diff.
    */
-  private void computeDiffRecursively(final INodeDirectory snapshotRoot,
+  private void computeDiffRecursively(final INodeDirectory snapshotDir,
       INode node, List<byte[]> parentPath, SnapshotDiffInfo diffReport) {
     final Snapshot earlierSnapshot = diffReport.isFromEarlier() ?
         diffReport.getFrom() : diffReport.getTo();
@@ -331,7 +343,7 @@ public class DirectorySnapshottableFeature extends DirectoryWithSnapshotFeature
         boolean toProcess = diff.searchIndex(ListType.DELETED, name) < 0;
         if (!toProcess && child instanceof INodeReference.WithName) {
           byte[][] renameTargetPath = findRenameTargetPath(
-              snapshotRoot, (WithName) child,
+              snapshotDir, (WithName) child,
               laterSnapshot == null ? Snapshot.CURRENT_STATE_ID :
                 laterSnapshot.getId());
           if (renameTargetPath != null) {
@@ -341,7 +353,7 @@ public class DirectorySnapshottableFeature extends DirectoryWithSnapshotFeature
         }
         if (toProcess) {
           parentPath.add(name);
-          computeDiffRecursively(snapshotRoot, child, parentPath, diffReport);
+          computeDiffRecursively(snapshotDir, child, parentPath, diffReport);
           parentPath.remove(parentPath.size() - 1);
         }
       }

+ 10 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDiffInfo.java

@@ -17,7 +17,6 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
-import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
@@ -101,6 +100,10 @@ class SnapshotDiffInfo {
 
   /** The root directory of the snapshots */
   private final INodeDirectory snapshotRoot;
+  /**
+   *  The scope directory under which snapshot diff is calculated.
+   */
+  private final INodeDirectory snapshotDiffScopeDir;
   /** The starting point of the difference */
   private final Snapshot from;
   /** The end point of the difference */
@@ -123,9 +126,12 @@ class SnapshotDiffInfo {
   private final Map<Long, RenameEntry> renameMap =
       new HashMap<Long, RenameEntry>();
 
-  SnapshotDiffInfo(INodeDirectory snapshotRoot, Snapshot start, Snapshot end) {
-    Preconditions.checkArgument(snapshotRoot.isSnapshottable());
-    this.snapshotRoot = snapshotRoot;
+  SnapshotDiffInfo(INodeDirectory snapshotRootDir,
+      INodeDirectory snapshotDiffScopeDir, Snapshot start, Snapshot end) {
+    Preconditions.checkArgument(snapshotRootDir.isSnapshottable() &&
+        snapshotDiffScopeDir.isDescendantOfSnapshotRoot(snapshotRootDir));
+    this.snapshotRoot = snapshotRootDir;
+    this.snapshotDiffScopeDir = snapshotDiffScopeDir;
     this.from = start;
     this.to = end;
   }

+ 55 - 8
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java

@@ -38,6 +38,7 @@ import javax.management.ObjectName;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.DFSUtilClient;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
@@ -83,6 +84,13 @@ public class SnapshotManager implements SnapshotStatsMXBean {
    * together with the modification in next snapshot.
    */
   private boolean skipCaptureAccessTimeOnlyChange = false;
+  /**
+   * If snapshotDiffAllowSnapRootDescendant is set to true, snapshot diff
+   * operation can be run for any descendant directory under a snapshot root
+   * directory and the diff calculation will be scoped to the descendant
+   * directory.
+   */
+  private final boolean snapshotDiffAllowSnapRootDescendant;
 
   private final AtomicInteger numSnapshots = new AtomicInteger();
   private static final int SNAPSHOT_ID_BIT_WIDTH = 24;
@@ -102,9 +110,15 @@ public class SnapshotManager implements SnapshotStatsMXBean {
     this.skipCaptureAccessTimeOnlyChange = conf.getBoolean(
         DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE,
         DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE_DEFAULT);
+    this.snapshotDiffAllowSnapRootDescendant = conf.getBoolean(
+        DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_ALLOW_SNAP_ROOT_DESCENDANT,
+        DFSConfigKeys.
+            DFS_NAMENODE_SNAPSHOT_DIFF_ALLOW_SNAP_ROOT_DESCENDANT_DEFAULT);
     LOG.info("Loaded config captureOpenFiles: " + captureOpenFiles
-        + "skipCaptureAccessTimeOnlyChange: " +
-        skipCaptureAccessTimeOnlyChange);
+        + ", skipCaptureAccessTimeOnlyChange: "
+        + skipCaptureAccessTimeOnlyChange
+        + ", snapshotDiffAllowSnapRootDescendant: "
+        + snapshotDiffAllowSnapRootDescendant);
   }
 
   /**
@@ -228,6 +242,30 @@ public class SnapshotManager implements SnapshotStatsMXBean {
     return dir;
   }
 
+  /**
+   * Get the snapshot root directory for the given directory. The given
+   * directory must either be a snapshot root or a descendant of any
+   * snapshot root directories.
+   * @param iip INodesInPath for the directory to get snapshot root.
+   * @return the snapshot root INodeDirectory
+   */
+  public INodeDirectory getSnapshottableAncestorDir(final INodesInPath iip)
+      throws IOException {
+    final String path = iip.getPath();
+    final INodeDirectory dir = INodeDirectory.valueOf(iip.getLastINode(), path);
+    if (dir.isSnapshottable()) {
+      return dir;
+    } else {
+      for (INodeDirectory snapRoot : this.snapshottables.values()) {
+        if (dir.isAncestorDirectory(snapRoot)) {
+          return snapRoot;
+        }
+      }
+      throw new SnapshotException("Directory is neither snapshottable nor" +
+          " under a snap root!");
+    }
+  }
+
   /**
    * Create a snapshot of the given path.
    * It is assumed that the caller will perform synchronization.
@@ -396,22 +434,31 @@ public class SnapshotManager implements SnapshotStatsMXBean {
    * snapshot of the directory and its current tree.
    */
   public SnapshotDiffReport diff(final INodesInPath iip,
-      final String snapshotRootPath, final String from,
+      final String snapshotPath, final String from,
       final String to) throws IOException {
     // Find the source root directory path where the snapshots were taken.
     // All the check for path has been included in the valueOf method.
-    final INodeDirectory snapshotRoot = getSnapshottableRoot(iip);
+    INodeDirectory snapshotRootDir;
+    if (this.snapshotDiffAllowSnapRootDescendant) {
+      snapshotRootDir = getSnapshottableAncestorDir(iip);
+    } else {
+      snapshotRootDir = getSnapshottableRoot(iip);
+    }
+    Preconditions.checkNotNull(snapshotRootDir);
+    INodeDirectory snapshotDescendantDir = INodeDirectory.valueOf(
+        iip.getLastINode(), snapshotPath);
 
     if ((from == null || from.isEmpty())
         && (to == null || to.isEmpty())) {
       // both fromSnapshot and toSnapshot indicate the current tree
-      return new SnapshotDiffReport(snapshotRootPath, from, to,
+      return new SnapshotDiffReport(snapshotPath, from, to,
           Collections.<DiffReportEntry> emptyList());
     }
-    final SnapshotDiffInfo diffs = snapshotRoot
-        .getDirectorySnapshottableFeature().computeDiff(snapshotRoot, from, to);
+    final SnapshotDiffInfo diffs = snapshotRootDir
+        .getDirectorySnapshottableFeature().computeDiff(
+            snapshotRootDir, snapshotDescendantDir, from, to);
     return diffs != null ? diffs.generateReport() : new SnapshotDiffReport(
-        snapshotRootPath, from, to, Collections.<DiffReportEntry> emptyList());
+        snapshotPath, from, to, Collections.<DiffReportEntry> emptyList());
   }
   
   public void clearSnapshottableDirs() {

+ 7 - 23
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/top/metrics/TopMetrics.java

@@ -70,14 +70,6 @@ public class TopMetrics implements MetricsSource {
   public static final Logger LOG = LoggerFactory.getLogger(TopMetrics.class);
   public static final String TOPMETRICS_METRICS_SOURCE_NAME =
       "NNTopUserOpCounts";
-  /**
-   * In addition to counts of different RPC calls, NNTop also reports top
-   * users listing large directories (measured by the number of files involved
-   * in listing operations from the user). This is important because the CPU
-   * and GC overhead of a listing operation grows linearly with the number of
-   * files involved. This category in NNTop is {@link #FILES_IN_GETLISTING}.
-   */
-  public static final String FILES_IN_GETLISTING = "filesInGetListing";
   private final boolean isMetricsSourceEnabled;
 
   private static void logConf(Configuration conf) {
@@ -131,30 +123,22 @@ public class TopMetrics implements MetricsSource {
   public void report(boolean succeeded, String userName, InetAddress addr,
       String cmd, String src, String dst, FileStatus status) {
     // currently nntop only makes use of the username and the command
-    report(userName, cmd, 1);
+    report(userName, cmd);
   }
 
-  public void reportFilesInGetListing(String userName, int numFiles) {
-    report(userName, FILES_IN_GETLISTING, numFiles);
-  }
-
-  public void report(String userName, String cmd, int delta) {
+  public void report(String userName, String cmd) {
     long currTime = Time.monotonicNow();
-    report(currTime, userName, cmd, delta);
+    report(currTime, userName, cmd);
   }
 
-  public void report(long currTime, String userName, String cmd, int delta) {
+  public void report(long currTime, String userName, String cmd) {
     LOG.debug("a metric is reported: cmd: {} user: {}", cmd, userName);
     userName = UserGroupInformation.trimLoginMethod(userName);
     for (RollingWindowManager rollingWindowManager : rollingWindowManagers
         .values()) {
-      rollingWindowManager.recordMetric(currTime, cmd, userName, delta);
-      // Increase the number of all RPC calls by the user, unless the report
-      // is for the number of files in a listing operation.
-      if (!cmd.equals(FILES_IN_GETLISTING)) {
-        rollingWindowManager.recordMetric(currTime,
-            TopConf.ALL_CMDS, userName, delta);
-      }
+      rollingWindowManager.recordMetric(currTime, cmd, userName, 1);
+      rollingWindowManager.recordMetric(currTime,
+          TopConf.ALL_CMDS, userName, 1);
     }
   }
 

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java

@@ -33,11 +33,11 @@ import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
 import java.util.TreeSet;
 import java.util.concurrent.TimeUnit;
 
 import com.google.common.base.Joiner;
-import com.google.common.base.Optional;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;

+ 11 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

@@ -4298,6 +4298,17 @@
   </description>
 </property>
 
+<property>
+  <name>dfs.namenode.snapshotdiff.allow.snap-root-descendant</name>
+  <value>true</value>
+  <description>
+    If enabled, snapshotDiff command can be run for any descendant directory
+    under a snapshot root directory and the diff calculation will be scoped
+    to the given descendant directory. Otherwise, snapshot diff command can
+    only be run for a snapshot root directory.
+  </description>
+</property>
+
 <property>
   <name>dfs.pipeline.ecn</name>
   <value>false</value>

+ 3 - 8
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestTopMetrics.java

@@ -26,7 +26,6 @@ import org.apache.hadoop.metrics2.lib.Interns;
 import org.junit.Test;
 
 import static org.apache.hadoop.hdfs.server.namenode.top.metrics.TopMetrics.TOPMETRICS_METRICS_SOURCE_NAME;
-import static org.apache.hadoop.hdfs.server.namenode.top.metrics.TopMetrics.FILES_IN_GETLISTING;
 import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
@@ -42,10 +41,9 @@ public class TestTopMetrics {
     TopMetrics topMetrics = new TopMetrics(conf,
         topConf.nntopReportingPeriodsMs);
     // Dummy command
-    topMetrics.report("test", "listStatus", 1);
-    topMetrics.report("test", "listStatus", 1);
-    topMetrics.report("test", "listStatus", 1);
-    topMetrics.report("test", FILES_IN_GETLISTING, 1000);
+    topMetrics.report("test", "listStatus");
+    topMetrics.report("test", "listStatus");
+    topMetrics.report("test", "listStatus");
 
     MetricsRecordBuilder rb = getMetrics(topMetrics);
     MetricsCollector mc = rb.parent();
@@ -61,8 +59,5 @@ public class TestTopMetrics {
 
     verify(rb, times(3)).addCounter(Interns.info("op=listStatus." +
         "user=test.count", "Total operations performed by user"), 3L);
-
-    verify(rb, times(3)).addCounter(Interns.info("op=" + FILES_IN_GETLISTING +
-        ".user=test.count", "Total operations performed by user"), 1000L);
   }
 }

+ 80 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapRootDescendantDiff.java

@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.snapshot;
+
+import static org.junit.Assert.fail;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test snapshot diff report for the snapshot root descendant directory.
+ */
+public class TestSnapRootDescendantDiff extends TestSnapshotDiffReport {
+  @Before
+  public void setUp() throws Exception {
+    conf = new Configuration();
+    conf.setBoolean(
+        DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_CAPTURE_OPENFILES, true);
+    conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 1);
+    conf.setBoolean(
+        DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE,
+        true);
+    conf.setBoolean(
+        DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_ALLOW_SNAP_ROOT_DESCENDANT,
+        false);
+    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3)
+        .format(true).build();
+    cluster.waitActive();
+    hdfs = cluster.getFileSystem();
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    if (cluster != null) {
+      cluster.shutdown();
+      cluster = null;
+    }
+  }
+
+  @Test
+  public void testNonSnapRootDiffReport() throws Exception {
+    Path subsub1 = new Path(getSnapRootDir(), "subsub1");
+    Path subsubsub1 = new Path(subsub1, "subsubsub1");
+    hdfs.mkdirs(subsubsub1);
+    modifyAndCreateSnapshot(getSnapRootDir(), new Path[]{getSnapRootDir()});
+    modifyAndCreateSnapshot(subsubsub1, new Path[]{getSnapRootDir()});
+
+    try {
+      hdfs.getSnapshotDiffReport(subsub1, "s1", "s2");
+      fail("Expect exception when getting snapshot diff report: " + subsub1
+          + " is not a snapshottable directory.");
+    } catch (IOException e) {
+      GenericTestUtils.assertExceptionContains(
+          "Directory is not a snapshottable directory: " + subsub1, e);
+    }
+  }
+
+}

+ 503 - 38
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDiffReport.java

@@ -43,12 +43,15 @@ import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag;
 import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
 import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffReportEntry;
 import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffType;
+import org.apache.hadoop.hdfs.protocol.SnapshotException;
+import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.util.Time;
 import org.junit.After;
 import org.junit.Assert;
+import org.junit.Assume;
 import org.junit.Before;
 import org.junit.Test;
 import org.slf4j.Logger;
@@ -60,7 +63,6 @@ import org.slf4j.LoggerFactory;
 public class TestSnapshotDiffReport {
   private static final Logger LOG =
       LoggerFactory.getLogger(TestSnapshotDiffReport.class);
-
   private static final long SEED = 0;
   private static final short REPLICATION = 3;
   private static final short REPLICATION_1 = 2;
@@ -74,7 +76,6 @@ public class TestSnapshotDiffReport {
   protected Configuration conf;
   protected MiniDFSCluster cluster;
   protected DistributedFileSystem hdfs;
-  
   private final HashMap<Path, Integer> snapshotNumberMap = new HashMap<Path, Integer>();
 
   @Before
@@ -86,6 +87,9 @@ public class TestSnapshotDiffReport {
     conf.setBoolean(
         DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_SKIP_CAPTURE_ACCESSTIME_ONLY_CHANGE,
         true);
+    conf.setBoolean(
+        DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_ALLOW_SNAP_ROOT_DESCENDANT,
+        true);
     cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION)
         .format(true).build();
     cluster.waitActive();
@@ -99,7 +103,11 @@ public class TestSnapshotDiffReport {
       cluster = null;
     }
   }
-  
+
+  protected Path getSnapRootDir() {
+    return sub1;
+  }
+
   private String genSnapshotName(Path snapshotDir) {
     int sNum = -1;
     if (snapshotNumberMap.containsKey(snapshotDir)) {
@@ -108,12 +116,12 @@ public class TestSnapshotDiffReport {
     snapshotNumberMap.put(snapshotDir, ++sNum);
     return "s" + sNum;
   }
-  
+
   /**
    * Create/modify/delete files under a given directory, also create snapshots
    * of directories.
-   */ 
-  private void modifyAndCreateSnapshot(Path modifyDir, Path[] snapshotDirs)
+   */
+  protected void modifyAndCreateSnapshot(Path modifyDir, Path[] snapshotDirs)
       throws Exception {
     Path file10 = new Path(modifyDir, "file10");
     Path file11 = new Path(modifyDir, "file11");
@@ -133,7 +141,7 @@ public class TestSnapshotDiffReport {
       hdfs.allowSnapshot(snapshotDir);
       hdfs.createSnapshot(snapshotDir, genSnapshotName(snapshotDir));
     }
-    
+
     // delete file11
     hdfs.delete(file11, true);
     // modify file12
@@ -146,12 +154,12 @@ public class TestSnapshotDiffReport {
     DFSTestUtil.createFile(hdfs, file14, BLOCKSIZE, REPLICATION, SEED);
     // create file15
     DFSTestUtil.createFile(hdfs, file15, BLOCKSIZE, REPLICATION, SEED);
-    
+
     // create snapshot
     for (Path snapshotDir : snapshotDirs) {
       hdfs.createSnapshot(snapshotDir, genSnapshotName(snapshotDir));
     }
-    
+
     // create file11 again
     DFSTestUtil.createFile(hdfs, file11, BLOCKSIZE, REPLICATION, SEED);
     // delete file12
@@ -164,7 +172,7 @@ public class TestSnapshotDiffReport {
     hdfs.delete(file14, true);
     // modify file15
     hdfs.setReplication(file15, (short) (REPLICATION - 1));
-    
+
     // create snapshot
     for (Path snapshotDir : snapshotDirs) {
       hdfs.createSnapshot(snapshotDir, genSnapshotName(snapshotDir));
@@ -172,8 +180,10 @@ public class TestSnapshotDiffReport {
     // modify file10
     hdfs.setReplication(file10, (short) (REPLICATION + 1));
   }
-  
-  /** check the correctness of the diff reports */
+
+  /**
+   * Check the correctness of the diff reports.
+   */
   private void verifyDiffReport(Path dir, String from, String to,
       DiffReportEntry... entries) throws IOException {
     SnapshotDiffReport report = hdfs.getSnapshotDiffReport(dir, from, to);
@@ -182,10 +192,10 @@ public class TestSnapshotDiffReport {
         .getSnapshotDiffReport(dir, to, from);
     LOG.info(report.toString());
     LOG.info(inverseReport.toString() + "\n");
-    
+
     assertEquals(entries.length, report.getDiffList().size());
     assertEquals(entries.length, inverseReport.getDiffList().size());
-    
+
     for (DiffReportEntry entry : entries) {
       if (entry.getType() == DiffType.MODIFY) {
         assertTrue(report.getDiffList().contains(entry));
@@ -201,9 +211,11 @@ public class TestSnapshotDiffReport {
       }
     }
   }
-  
-  /** Test the computation and representation of diff between snapshots */
-  @Test (timeout=60000)
+
+  /**
+   * Test the computation and representation of diff between snapshots.
+   */
+  @Test(timeout = 60000)
   public void testDiffReport() throws Exception {
     cluster.getNamesystem().getSnapshotManager().setAllowNestedSnapshots(true);
 
@@ -212,45 +224,38 @@ public class TestSnapshotDiffReport {
     hdfs.mkdirs(subsubsub1);
     modifyAndCreateSnapshot(sub1, new Path[]{sub1, subsubsub1});
     modifyAndCreateSnapshot(subsubsub1, new Path[]{sub1, subsubsub1});
-    
-    try {
-      hdfs.getSnapshotDiffReport(subsub1, "s1", "s2");
-      fail("Expect exception when getting snapshot diff report: " + subsub1
-          + " is not a snapshottable directory.");
-    } catch (IOException e) {
-      GenericTestUtils.assertExceptionContains(
-          "Directory is not a snapshottable directory: " + subsub1, e);
-    }
-    
+
     final String invalidName = "invalid";
     try {
       hdfs.getSnapshotDiffReport(sub1, invalidName, invalidName);
-      fail("Expect exception when providing invalid snapshot name for diff report");
+      fail("Expect exception when providing invalid snapshot name " +
+          "for diff report");
     } catch (IOException e) {
       GenericTestUtils.assertExceptionContains(
           "Cannot find the snapshot of directory " + sub1 + " with name "
               + invalidName, e);
     }
-    
+
     // diff between the same snapshot
     SnapshotDiffReport report = hdfs.getSnapshotDiffReport(sub1, "s0", "s0");
     LOG.info(report.toString());
     assertEquals(0, report.getDiffList().size());
-    
+
     report = hdfs.getSnapshotDiffReport(sub1, "", "");
     LOG.info(report.toString());
     assertEquals(0, report.getDiffList().size());
-    
+
     report = hdfs.getSnapshotDiffReport(subsubsub1, "s0", "s2");
     LOG.info(report.toString());
     assertEquals(0, report.getDiffList().size());
 
     // test path with scheme also works
-    report = hdfs.getSnapshotDiffReport(hdfs.makeQualified(subsubsub1), "s0", "s2");
+    report = hdfs.getSnapshotDiffReport(hdfs.makeQualified(subsubsub1),
+        "s0", "s2");
     LOG.info(report.toString());
     assertEquals(0, report.getDiffList().size());
 
-    verifyDiffReport(sub1, "s0", "s2", 
+    verifyDiffReport(sub1, "s0", "s2",
         new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("")),
         new DiffReportEntry(DiffType.CREATE, DFSUtil.string2Bytes("file15")),
         new DiffReportEntry(DiffType.DELETE, DFSUtil.string2Bytes("file12")),
@@ -260,7 +265,7 @@ public class TestSnapshotDiffReport {
         new DiffReportEntry(DiffType.DELETE, DFSUtil.string2Bytes("link13")),
         new DiffReportEntry(DiffType.CREATE, DFSUtil.string2Bytes("link13")));
 
-    verifyDiffReport(sub1, "s0", "s5", 
+    verifyDiffReport(sub1, "s0", "s5",
         new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("")),
         new DiffReportEntry(DiffType.CREATE, DFSUtil.string2Bytes("file15")),
         new DiffReportEntry(DiffType.DELETE, DFSUtil.string2Bytes("file12")),
@@ -282,7 +287,7 @@ public class TestSnapshotDiffReport {
             DFSUtil.string2Bytes("subsub1/subsubsub1/link13")),
         new DiffReportEntry(DiffType.CREATE,
             DFSUtil.string2Bytes("subsub1/subsubsub1/file15")));
-    
+
     verifyDiffReport(sub1, "s2", "s5",
         new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("file10")),
         new DiffReportEntry(DiffType.MODIFY,
@@ -297,7 +302,7 @@ public class TestSnapshotDiffReport {
             DFSUtil.string2Bytes("subsub1/subsubsub1/link13")),
         new DiffReportEntry(DiffType.CREATE,
             DFSUtil.string2Bytes("subsub1/subsubsub1/file15")));
-    
+
     verifyDiffReport(sub1, "s3", "",
         new DiffReportEntry(DiffType.MODIFY,
             DFSUtil.string2Bytes("subsub1/subsubsub1")),
@@ -318,7 +323,467 @@ public class TestSnapshotDiffReport {
         new DiffReportEntry(DiffType.DELETE,
             DFSUtil.string2Bytes("subsub1/subsubsub1/link13")));
   }
-  
+
+  @Test(timeout = 60000)
+  public void testSnapRootDescendantDiffReport() throws Exception {
+    Assume.assumeTrue(conf.getBoolean(
+        DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_ALLOW_SNAP_ROOT_DESCENDANT,
+        DFSConfigKeys.
+            DFS_NAMENODE_SNAPSHOT_DIFF_ALLOW_SNAP_ROOT_DESCENDANT_DEFAULT));
+    Path subSub = new Path(sub1, "subsub1");
+    Path subSubSub = new Path(subSub, "subsubsub1");
+    Path nonSnapDir = new Path(dir, "non_snap");
+    hdfs.mkdirs(subSubSub);
+    hdfs.mkdirs(nonSnapDir);
+
+    modifyAndCreateSnapshot(sub1, new Path[]{sub1});
+    modifyAndCreateSnapshot(subSub, new Path[]{sub1});
+    modifyAndCreateSnapshot(subSubSub, new Path[]{sub1});
+
+    try {
+      hdfs.getSnapshotDiffReport(subSub, "s1", "s2");
+      hdfs.getSnapshotDiffReport(subSubSub, "s1", "s2");
+    } catch (IOException e) {
+      fail("Unexpected exception when getting snapshot diff report " +
+          subSub + ": " + e);
+    }
+
+    try {
+      hdfs.getSnapshotDiffReport(nonSnapDir, "s1", "s2");
+      fail("Snapshot diff report on a non snapshot directory '"
+          + nonSnapDir.getName() + "'should fail!");
+    } catch (SnapshotException e) {
+      GenericTestUtils.assertExceptionContains(
+          "Directory is neither snapshottable nor under a snap root!", e);
+    }
+
+    final String invalidName = "invalid";
+    try {
+      hdfs.getSnapshotDiffReport(subSub, invalidName, invalidName);
+      fail("Expect exception when providing invalid snapshot name " +
+          "for diff report");
+    } catch (IOException e) {
+      GenericTestUtils.assertExceptionContains(
+          "Cannot find the snapshot of directory " + sub1 + " with name "
+              + invalidName, e);
+    }
+
+    // diff between the same snapshot
+    SnapshotDiffReport report = hdfs.getSnapshotDiffReport(subSub, "s0", "s0");
+    assertEquals(0, report.getDiffList().size());
+
+    report = hdfs.getSnapshotDiffReport(subSub, "", "");
+    assertEquals(0, report.getDiffList().size());
+
+    report = hdfs.getSnapshotDiffReport(subSubSub, "s0", "s2");
+    assertEquals(0, report.getDiffList().size());
+
+    report = hdfs.getSnapshotDiffReport(
+        hdfs.makeQualified(subSubSub), "s0", "s2");
+    assertEquals(0, report.getDiffList().size());
+
+    verifyDescendantDiffReports(sub1, subSub, subSubSub);
+  }
+
+  private void verifyDescendantDiffReports(final Path snapDir,
+      final Path snapSubDir, final Path snapSubSubDir) throws
+      IOException {
+    verifyDiffReport(snapDir, "s0", "s2",
+        new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("")),
+        new DiffReportEntry(DiffType.CREATE, DFSUtil.string2Bytes("file15")),
+        new DiffReportEntry(DiffType.DELETE, DFSUtil.string2Bytes("file12")),
+        new DiffReportEntry(DiffType.DELETE, DFSUtil.string2Bytes("file11")),
+        new DiffReportEntry(DiffType.CREATE, DFSUtil.string2Bytes("file11")),
+        new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("file13")),
+        new DiffReportEntry(DiffType.DELETE, DFSUtil.string2Bytes("link13")),
+        new DiffReportEntry(DiffType.CREATE, DFSUtil.string2Bytes("link13")));
+    verifyDiffReport(snapSubDir, "s0", "s2", new DiffReportEntry[]{});
+    verifyDiffReport(snapSubSubDir, "s0", "s2", new DiffReportEntry[]{});
+
+    verifyDiffReport(snapDir, "s0", "s8",
+        new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("")),
+        new DiffReportEntry(DiffType.CREATE, DFSUtil.string2Bytes("file15")),
+        new DiffReportEntry(DiffType.DELETE, DFSUtil.string2Bytes("file12")),
+        new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("file10")),
+        new DiffReportEntry(DiffType.DELETE, DFSUtil.string2Bytes("file11")),
+        new DiffReportEntry(DiffType.CREATE, DFSUtil.string2Bytes("file11")),
+        new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("file13")),
+        new DiffReportEntry(DiffType.DELETE, DFSUtil.string2Bytes("link13")),
+        new DiffReportEntry(DiffType.CREATE, DFSUtil.string2Bytes("link13")),
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("subsub1")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/file10")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/file11")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/file13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/link13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/file15")),
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("subsub1/subsubsub1")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/subsubsub1/file10")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/subsubsub1/file11")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/subsubsub1/file13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/subsubsub1/link13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/subsubsub1/file15")));
+
+    verifyDiffReport(snapSubDir, "s0", "s8",
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file10")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file11")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("link13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file15")),
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("subsubsub1")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsubsub1/file10")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsubsub1/file11")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsubsub1/file13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsubsub1/link13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsubsub1/file15")));
+
+    verifyDiffReport(snapSubSubDir, "s0", "s8",
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file10")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file11")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("link13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file15")));
+
+    verifyDiffReport(snapDir, "s2", "s5",
+        new DiffReportEntry(DiffType.MODIFY, DFSUtil.string2Bytes("file10")),
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("subsub1")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/file10")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/file11")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/file13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/link13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/file15")));
+
+    verifyDiffReport(snapSubDir, "s2", "s5",
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file10")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file11")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("link13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file15")));
+    verifyDiffReport(snapSubSubDir, "s2", "s5",
+        new DiffReportEntry[]{});
+
+    verifyDiffReport(snapDir, "s3", "",
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("subsub1")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/file15")),
+        new DiffReportEntry(DiffType.DELETE,
+            DFSUtil.string2Bytes("subsub1/file12")),
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("subsub1/file10")),
+        new DiffReportEntry(DiffType.DELETE,
+            DFSUtil.string2Bytes("subsub1/file11")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/file11")),
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("subsub1/file13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/link13")),
+        new DiffReportEntry(DiffType.DELETE,
+            DFSUtil.string2Bytes("subsub1/link13")),
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("subsub1/subsubsub1")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/subsubsub1/file10")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/subsubsub1/file11")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/subsubsub1/file13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/subsubsub1/link13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/subsubsub1/file15")));
+
+    verifyDiffReport(snapSubDir, "s3", "",
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file15")),
+        new DiffReportEntry(DiffType.DELETE,
+            DFSUtil.string2Bytes("file12")),
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("file10")),
+        new DiffReportEntry(DiffType.DELETE,
+            DFSUtil.string2Bytes("file11")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file11")),
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("file13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("link13")),
+        new DiffReportEntry(DiffType.DELETE,
+            DFSUtil.string2Bytes("link13")),
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("subsubsub1")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsubsub1/file10")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsubsub1/file11")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsubsub1/file13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsubsub1/link13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsubsub1/file15")));
+
+    verifyDiffReport(snapSubSubDir, "s3", "",
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file10")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file11")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("link13")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file15")));
+  }
+
+  @Test
+  public void testSnapRootDescendantDiffReportWithRename() throws Exception {
+    Assume.assumeTrue(conf.getBoolean(
+        DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_ALLOW_SNAP_ROOT_DESCENDANT,
+        DFSConfigKeys.
+            DFS_NAMENODE_SNAPSHOT_DIFF_ALLOW_SNAP_ROOT_DESCENDANT_DEFAULT));
+    Path subSub = new Path(sub1, "subsub1");
+    Path subSubSub = new Path(subSub, "subsubsub1");
+    Path nonSnapDir = new Path(dir, "non_snap");
+    hdfs.mkdirs(subSubSub);
+    hdfs.mkdirs(nonSnapDir);
+
+    hdfs.allowSnapshot(sub1);
+    hdfs.createSnapshot(sub1, genSnapshotName(sub1));
+    Path file20 = new Path(subSubSub, "file20");
+    DFSTestUtil.createFile(hdfs, file20, BLOCKSIZE, REPLICATION_1, SEED);
+    hdfs.createSnapshot(sub1, genSnapshotName(sub1));
+
+    // Case 1: Move a file away from a descendant dir, but within the snap root.
+    // mv <snaproot>/<subsub>/<subsubsub>/file20 <snaproot>/<subsub>/file20
+    hdfs.rename(file20, new Path(subSub, file20.getName()));
+    hdfs.createSnapshot(sub1, genSnapshotName(sub1));
+
+    // The snapshot diff for the snap root detects the change as file rename
+    // as the file move happened within the snap root.
+    verifyDiffReport(sub1, "s1", "s2",
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("subsub1")),
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("subsub1/subsubsub1")),
+        new DiffReportEntry(DiffType.RENAME,
+            DFSUtil.string2Bytes("subsub1/subsubsub1/file20"),
+            DFSUtil.string2Bytes("subsub1/file20")));
+
+    // The snapshot diff for the descendant dir <subsub> still detects the
+    // change as file rename as the file move happened under the snap root
+    // descendant dir.
+    verifyDiffReport(subSub, "s1", "s2",
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("")),
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("subsubsub1")),
+        new DiffReportEntry(DiffType.RENAME,
+            DFSUtil.string2Bytes("subsubsub1/file20"),
+            DFSUtil.string2Bytes("file20")));
+
+    // The snapshot diff for the descendant dir <subsubsub> detects the
+    // change as file delete as the file got moved from its scope.
+    verifyDiffReport(subSubSub, "s1", "s2",
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("")),
+        new DiffReportEntry(DiffType.DELETE,
+            DFSUtil.string2Bytes("file20")));
+
+    // Case 2: Move the file from the snap root descendant dir to any
+    // non snap root dir. mv <snaproot>/<subsub>/file20 <nonsnaproot>/file20.
+    hdfs.rename(new Path(subSub, file20.getName()),
+        new Path(dir, file20.getName()));
+    hdfs.createSnapshot(sub1, genSnapshotName(sub1));
+
+    // The snapshot diff for the snap root detects the change as file delete
+    // as the file got moved away from the snap root dir to some non snap
+    // root dir.
+    verifyDiffReport(sub1, "s2", "s3",
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("subsub1")),
+        new DiffReportEntry(DiffType.DELETE,
+            DFSUtil.string2Bytes("subsub1/file20")));
+
+    // The snapshot diff for the snap root descendant <subsub> detects the
+    // change as file delete as the file was previously under its scope and
+    // got moved away from its scope.
+    verifyDiffReport(subSub, "s2", "s3",
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("")),
+        new DiffReportEntry(DiffType.DELETE,
+            DFSUtil.string2Bytes("file20")));
+
+    // The file was already not under the descendant dir <subsubsub> scope.
+    // So, the snapshot diff report for the descendant dir doesn't
+    // show the file rename at all.
+    verifyDiffReport(subSubSub, "s2", "s3",
+        new DiffReportEntry[]{});
+
+    // Case 3: Move the file from the non-snap root dir to snap root dir
+    // mv <nonsnaproot>/file20 <snaproot>/file20
+    hdfs.rename(new Path(dir, file20.getName()),
+        new Path(sub1, file20.getName()));
+    hdfs.createSnapshot(sub1, genSnapshotName(sub1));
+
+    // Snap root directory should show the file moved in as a new file.
+    verifyDiffReport(sub1, "s3", "s4",
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file20")));
+
+    // Snap descendant directories don't have visibility to the moved in file.
+    verifyDiffReport(subSub, "s3", "s4",
+        new DiffReportEntry[]{});
+    verifyDiffReport(subSubSub, "s3", "s4",
+        new DiffReportEntry[]{});
+
+    hdfs.rename(new Path(sub1, file20.getName()),
+        new Path(subSub, file20.getName()));
+    hdfs.createSnapshot(sub1, genSnapshotName(sub1));
+
+    // Snap root directory now shows the rename as both source and
+    // destination paths are under the snap root.
+    verifyDiffReport(sub1, "s4", "s5",
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("")),
+        new DiffReportEntry(DiffType.RENAME,
+            DFSUtil.string2Bytes("file20"),
+            DFSUtil.string2Bytes("subsub1/file20")),
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("subsub1")));
+
+    // For the descendant directory under the snap root, the file
+    // moved in shows up as a new file created.
+    verifyDiffReport(subSub, "s4", "s5",
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("file20")));
+
+    verifyDiffReport(subSubSub, "s4", "s5",
+        new DiffReportEntry[]{});
+
+    // Case 4: Snapshot diff for the newly created descendant directory.
+    Path subSubSub2 = new Path(subSub, "subsubsub2");
+    hdfs.mkdirs(subSubSub2);
+    Path file30 = new Path(subSubSub2, "file30");
+    DFSTestUtil.createFile(hdfs, file30, BLOCKSIZE, REPLICATION_1, SEED);
+    hdfs.createFile(file30);
+    hdfs.createSnapshot(sub1, genSnapshotName(sub1));
+
+    verifyDiffReport(sub1, "s5", "s6",
+        new DiffReportEntry(DiffType.MODIFY,
+            DFSUtil.string2Bytes("subsub1")),
+        new DiffReportEntry(DiffType.CREATE,
+            DFSUtil.string2Bytes("subsub1/subsubsub2")));
+
+    verifyDiffReport(subSubSub2, "s5", "s6",
+        new DiffReportEntry[]{});
+
+    verifyDiffReport(subSubSub2, "s1", "s2",
+        new DiffReportEntry[]{});
+  }
+
+  @Test
+  public void testSnapshotDiffInfo() throws Exception {
+    Path snapshotRootDirPath = dir;
+    Path snapshotDirDescendantPath = new Path(snapshotRootDirPath, "desc");
+    Path snapshotDirNonDescendantPath = new Path("/dummy/non/snap/desc");
+    hdfs.mkdirs(snapshotDirDescendantPath);
+    hdfs.mkdirs(snapshotDirNonDescendantPath);
+
+    hdfs.allowSnapshot(snapshotRootDirPath);
+    hdfs.createSnapshot(snapshotRootDirPath, "s0");
+    hdfs.createSnapshot(snapshotRootDirPath, "s1");
+
+    INodeDirectory snapshotRootDir = cluster.getNameNode()
+        .getNamesystem().getFSDirectory().getINode(
+            snapshotRootDirPath.toUri().getPath())
+        .asDirectory();
+    INodeDirectory snapshotRootDescendantDir = cluster.getNameNode()
+        .getNamesystem().getFSDirectory().getINode(
+            snapshotDirDescendantPath.toUri().getPath())
+        .asDirectory();
+    INodeDirectory snapshotRootNonDescendantDir = cluster.getNameNode()
+        .getNamesystem().getFSDirectory().getINode(
+            snapshotDirNonDescendantPath.toUri().getPath())
+        .asDirectory();
+    try {
+      SnapshotDiffInfo sdi = new SnapshotDiffInfo(
+          snapshotRootDir,
+          snapshotRootDescendantDir,
+          new Snapshot(0, "s0", snapshotRootDescendantDir),
+          new Snapshot(0, "s1", snapshotRootDescendantDir));
+      LOG.info("SnapshotDiffInfo: " + sdi.getFrom() + " - " + sdi.getTo());
+    } catch (IllegalArgumentException iae){
+      fail("Unexpected exception when constructing SnapshotDiffInfo: " + iae);
+    }
+
+    try {
+      SnapshotDiffInfo sdi = new SnapshotDiffInfo(
+          snapshotRootDir,
+          snapshotRootNonDescendantDir,
+          new Snapshot(0, "s0", snapshotRootNonDescendantDir),
+          new Snapshot(0, "s1", snapshotRootNonDescendantDir));
+      LOG.info("SnapshotDiffInfo: " + sdi.getFrom() + " - " + sdi.getTo());
+      fail("SnapshotDiffInfo construction should fail for non snapshot root " +
+          "or non snapshot root descendant directories!");
+    } catch (IllegalArgumentException iae) {
+      // expected exception
+    }
+  }
+
   /**
    * Make changes under a sub-directory, then delete the sub-directory. Make
    * sure the diff report computation correctly retrieve the diff from the
@@ -828,4 +1293,4 @@ public class TestSnapshotDiffReport {
 
     assertAtimeNotEquals(filePostSS, root, "s2", "s3");
   }
-}
+}

Filskillnaden har hållts tillbaka eftersom den är för stor
+ 11 - 0
hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Common_2.8.2.xml


Filskillnaden har hållts tillbaka eftersom den är för stor
+ 11 - 0
hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Core_2.8.2.xml


Filskillnaden har hållts tillbaka eftersom den är för stor
+ 11 - 0
hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_JobClient_2.8.2.xml


+ 5 - 5
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java

@@ -39,8 +39,6 @@ import java.util.concurrent.Future;
 import java.util.concurrent.ThreadFactory;
 import java.util.concurrent.atomic.AtomicLong;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.LocalDirAllocator;
@@ -59,14 +57,16 @@ import org.apache.hadoop.yarn.util.FSDownload;
 
 import com.google.common.collect.Maps;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * A helper class for managing the distributed cache for {@link LocalJobRunner}.
  */
 @SuppressWarnings("deprecation")
 class LocalDistributedCacheManager {
-  public static final Log LOG =
-    LogFactory.getLog(LocalDistributedCacheManager.class);
+  public static final Logger LOG =
+      LoggerFactory.getLogger(LocalDistributedCacheManager.class);
   
   private List<String> localArchives = new ArrayList<String>();
   private List<String> localFiles = new ArrayList<String>();
@@ -230,7 +230,7 @@ class LocalDistributedCacheManager {
     final URL[] urls = new URL[localClasspaths.size()];
     for (int i = 0; i < localClasspaths.size(); ++i) {
       urls[i] = new File(localClasspaths.get(i)).toURI().toURL();
-      LOG.info(urls[i]);
+      LOG.info(urls[i].toString());
     }
     return AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() {
       @Override

+ 8 - 8
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalJobRunner.java

@@ -38,8 +38,6 @@ import java.util.concurrent.atomic.AtomicInteger;
 
 import javax.crypto.KeyGenerator;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
@@ -74,13 +72,15 @@ import org.apache.hadoop.util.ReflectionUtils;
 
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import org.apache.hadoop.util.concurrent.HadoopExecutors;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /** Implements MapReduce locally, in-process, for debugging. */
 @InterfaceAudience.Private
 @InterfaceStability.Unstable
 public class LocalJobRunner implements ClientProtocol {
-  public static final Log LOG =
-    LogFactory.getLog(LocalJobRunner.class);
+  public static final Logger LOG =
+      LoggerFactory.getLogger(LocalJobRunner.class);
 
   /** The maximum number of map tasks to run in parallel in LocalJobRunner */
   public static final String LOCAL_MAX_MAPS =
@@ -587,7 +587,7 @@ public class LocalJobRunner implements ClientProtocol {
         } else {
           this.status.setRunState(JobStatus.FAILED);
         }
-        LOG.warn(id, t);
+        LOG.warn(id.toString(), t);
 
         JobEndNotifier.localRunnerNotification(job, status);
 
@@ -721,17 +721,17 @@ public class LocalJobRunner implements ClientProtocol {
     @Override
     public synchronized void fsError(TaskAttemptID taskId, String message) 
     throws IOException {
-      LOG.fatal("FSError: "+ message + "from task: " + taskId);
+      LOG.error("FSError: "+ message + "from task: " + taskId);
     }
 
     @Override
     public void shuffleError(TaskAttemptID taskId, String message) throws IOException {
-      LOG.fatal("shuffleError: "+ message + "from task: " + taskId);
+      LOG.error("shuffleError: "+ message + "from task: " + taskId);
     }
     
     public synchronized void fatalError(TaskAttemptID taskId, String msg) 
     throws IOException {
-      LOG.fatal("Fatal: "+ msg + "from task: " + taskId);
+      LOG.error("Fatal: "+ msg + "from task: " + taskId);
     }
     
     @Override

+ 5 - 4
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/FileNameIndexUtils.java

@@ -24,20 +24,21 @@ import java.net.URLDecoder;
 import java.net.URLEncoder;
 import static java.nio.charset.StandardCharsets.UTF_8;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
 import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 public class FileNameIndexUtils {
 
   // Sanitize job history file for predictable parsing
   static final String DELIMITER = "-";
   static final String DELIMITER_ESCAPE = "%2D";
 
-  private static final Log LOG = LogFactory.getLog(FileNameIndexUtils.class);
+  private static final Logger LOG =
+      LoggerFactory.getLogger(FileNameIndexUtils.class);
 
   // Job history file names need to be backwards compatible
   // Only append new elements to the end of this list

+ 4 - 3
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java

@@ -28,8 +28,6 @@ import java.util.List;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
@@ -48,6 +46,8 @@ import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.util.MRApps;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 @InterfaceAudience.Private
 @InterfaceStability.Unstable
@@ -121,7 +121,8 @@ public class JobHistoryUtils {
   public static final String TIMESTAMP_DIR_REGEX = "\\d{4}" + "\\" + Path.SEPARATOR +  "\\d{2}" + "\\" + Path.SEPARATOR + "\\d{2}";
   public static final Pattern TIMESTAMP_DIR_PATTERN = Pattern.compile(TIMESTAMP_DIR_REGEX);
   private static final String TIMESTAMP_DIR_FORMAT = "%04d" + File.separator + "%02d" + File.separator + "%02d";
-  private static final Log LOG = LogFactory.getLog(JobHistoryUtils.class);
+  private static final Logger LOG =
+      LoggerFactory.getLogger(JobHistoryUtils.class);
 
   private static final PathFilter CONF_FILTER = new PathFilter() {
     @Override

+ 4 - 4
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/security/MRDelegationTokenRenewer.java

@@ -21,8 +21,6 @@ import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.security.PrivilegedAction;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
@@ -38,12 +36,14 @@ import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenRenewer;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
 import org.apache.hadoop.yarn.util.Records;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 @InterfaceAudience.Private
 public class MRDelegationTokenRenewer extends TokenRenewer {
 
-  private static final Log LOG = LogFactory
-      .getLog(MRDelegationTokenRenewer.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(MRDelegationTokenRenewer.class);
 
   @Override
   public boolean handleKind(Text kind) {

+ 4 - 4
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/security/client/ClientHSTokenSelector.java

@@ -20,19 +20,19 @@ package org.apache.hadoop.mapreduce.v2.security.client;
 
 import java.util.Collection;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.v2.api.MRDelegationTokenIdentifier;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.security.token.TokenSelector;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class ClientHSTokenSelector implements
     TokenSelector<MRDelegationTokenIdentifier> {
 
-  private static final Log LOG = LogFactory
-      .getLog(ClientHSTokenSelector.class);
+  private static final Logger LOG = LoggerFactory
+      .getLogger(ClientHSTokenSelector.class);
 
   @SuppressWarnings("unchecked")
   public Token<MRDelegationTokenIdentifier> selectToken(Text service,

+ 3 - 3
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java

@@ -34,8 +34,6 @@ import java.util.Map;
 
 import com.google.common.annotations.VisibleForTesting;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
@@ -70,6 +68,8 @@ import org.apache.hadoop.yarn.api.records.LocalResourceType;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.hadoop.yarn.util.Apps;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Helper class for MR applications
@@ -77,7 +77,7 @@ import org.apache.hadoop.yarn.util.Apps;
 @Private
 @Unstable
 public class MRApps extends Apps {
-  public static final Log LOG = LogFactory.getLog(MRApps.class);
+  public static final Logger LOG = LoggerFactory.getLogger(MRApps.class);
 
   public static String toString(JobId jid) {
     return jid.toString();

+ 4 - 4
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestLocalModeWithNewApis.java

@@ -27,8 +27,6 @@ import java.io.InputStreamReader;
 import java.util.Random;
 import java.util.StringTokenizer;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
@@ -44,11 +42,13 @@ import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class TestLocalModeWithNewApis {
 
-  public static final Log LOG = 
-      LogFactory.getLog(TestLocalModeWithNewApis.class);
+  public static final Logger LOG =
+      LoggerFactory.getLogger(TestLocalModeWithNewApis.class);
   
   Configuration conf;
   

+ 4 - 4
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestMRWithDistributedCache.java

@@ -31,8 +31,6 @@ import org.junit.Assert;
 import org.junit.Test;
 import static org.junit.Assert.*;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.filecache.DistributedCache;
 import org.apache.hadoop.fs.FSDataOutputStream;
@@ -50,6 +48,8 @@ import org.apache.hadoop.mapreduce.TaskInputOutputContext;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
 import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 /**
  * Tests the use of the
  * {@link org.apache.hadoop.mapreduce.filecache.DistributedCache} within the
@@ -81,8 +81,8 @@ public class TestMRWithDistributedCache {
     }
   }
 
-  private static final Log LOG =
-    LogFactory.getLog(TestMRWithDistributedCache.class);
+  private static final Logger LOG =
+      LoggerFactory.getLogger(TestMRWithDistributedCache.class);
   
   private static class DistributedCacheChecker {
 

+ 1 - 1
hadoop-project-dist/pom.xml

@@ -145,7 +145,7 @@
         <activeByDefault>false</activeByDefault>
       </activation>
       <properties>
-        <jdiff.stable.api>2.8.0</jdiff.stable.api>
+        <jdiff.stable.api>2.8.2</jdiff.stable.api>
         <jdiff.stability>-unstable</jdiff.stability>
         <!-- Commented out for HADOOP-11776 -->
         <!-- Uncomment param name="${jdiff.compatibility}" in javadoc doclet if compatibility is not empty -->

+ 5 - 0
hadoop-project/pom.xml

@@ -674,6 +674,11 @@
         <artifactId>asm</artifactId>
         <version>5.0.4</version>
       </dependency>
+      <dependency>
+          <groupId>org.ojalgo</groupId>
+          <artifactId>ojalgo</artifactId>
+          <version>43.0</version>
+      </dependency>
       <dependency>
         <groupId>com.sun.jersey</groupId>
         <artifactId>jersey-core</artifactId>

+ 2 - 0
hadoop-project/src/site/site.xml

@@ -103,6 +103,7 @@
       <item name="Disk Balancer" href="hadoop-project-dist/hadoop-hdfs/HDFSDiskbalancer.html"/>
       <item name="Upgrade Domain" href="hadoop-project-dist/hadoop-hdfs/HdfsUpgradeDomain.html"/>
       <item name="DataNode Admin" href="hadoop-project-dist/hadoop-hdfs/HdfsDataNodeAdminGuide.html"/>
+      <item name="Router Federation" href="hadoop-project-dist/hadoop-hdfs/HDFSRouterFederation.html"/>
     </menu>
 
     <menu name="Ozone" inherit="top">
@@ -186,6 +187,7 @@
       <item name="DistCp" href="hadoop-distcp/DistCp.html"/>
       <item name="GridMix" href="hadoop-gridmix/GridMix.html"/>
       <item name="Rumen" href="hadoop-rumen/Rumen.html"/>
+      <item name="Resource Estimator Service" href="hadoop-resourceestimator/ResourceEstimator.html"/>
       <item name="Scheduler Load Simulator" href="hadoop-sls/SchedulerLoadSimulator.html"/>
       <item name="Hadoop Benchmarking" href="hadoop-project-dist/hadoop-common/Benchmarking.html"/>
     </menu>

+ 19 - 0
hadoop-tools/hadoop-resourceestimator/README.md

@@ -0,0 +1,19 @@
+Resource Estimator Service
+==========================
+
+Resource Estimator Service can parse the history logs of production jobs, extract their resource consumption skylines in the past runs and predict their resource requirements for the new run.
+
+## Current Status
+
+  * Support [Hadoop YARN ResourceManager](http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/YARN.html) logs.
+  * In-memory store for parsed history resource skyline and estimation.
+  * A [Linear Programming](https://github.com/optimatika/ojAlgo) based estimator.
+  * Provides REST interface to parse logs, query history store and estimations.
+
+## Upcoming features
+
+  * UI to query history and edit and save estimations.
+  * Persisent store implementation for store (either DB based or distributed key-value like HBase).
+  * Integrate directly with the [Hadoop YARN Reservation System](http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ReservationSystem.html) to make a recurring reservation based on the estimated resources.
+
+Refer to the [design document](https://issues.apache.org/jira/secure/attachment/12886714/ResourceEstimator-design-v1.pdf) for more details.

+ 174 - 0
hadoop-tools/hadoop-resourceestimator/pom.xml

@@ -0,0 +1,174 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~
+  ~ Licensed to the Apache Software Foundation (ASF) under one
+  ~ or more contributor license agreements.  See the NOTICE file
+  ~ distributed with this work for additional information
+  ~ regarding copyright ownership.  The ASF licenses this file
+  ~ to you under the Apache License, Version 2.0 (the
+  ~ "License"); you may not use this file except in compliance
+  ~ with the License.  You may obtain a copy of the License at
+  ~
+  ~     http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  ~
+  -->
+<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xmlns="http://maven.apache.org/POM/4.0.0"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-project</artifactId>
+        <version>3.1.0-SNAPSHOT</version>
+        <relativePath>../../hadoop-project</relativePath>
+    </parent>
+    <artifactId>hadoop-resourceestimator</artifactId>
+    <name>Apache Resource Estimator Service</name>
+    <packaging>jar</packaging>
+    <dependencies>
+        <dependency>
+            <groupId>com.google.code.gson</groupId>
+            <artifactId>gson</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.ojalgo</groupId>
+            <artifactId>ojalgo</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-common</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-yarn-common</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-yarn-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>javax.inject</groupId>
+            <artifactId>javax.inject</artifactId>
+            <version>1</version>
+        </dependency>
+        <dependency>
+            <groupId>com.sun.jersey.jersey-test-framework</groupId>
+            <artifactId>jersey-test-framework-grizzly2</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>javax.servlet</groupId>
+            <artifactId>javax.servlet-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.sun.jersey</groupId>
+            <artifactId>jersey-server</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.sun.jersey</groupId>
+            <artifactId>jersey-json</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.rat</groupId>
+                <artifactId>apache-rat-plugin</artifactId>
+                <configuration>
+                    <excludes>
+                        <exclude>README.md</exclude>
+                        <exclude>**/*.txt</exclude>
+                    </excludes>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-checkstyle-plugin</artifactId>
+                <configuration>
+                    <configLocation>src/config/checkstyle.xml</configLocation>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+    <profiles>
+        <profile>
+            <id>dist</id>
+            <activation>
+                <activeByDefault>false</activeByDefault>
+            </activation>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-assembly-plugin</artifactId>
+                        <configuration>
+                            <appendAssemblyId>false</appendAssemblyId>
+                            <attach>false</attach>
+                            <finalName>
+                                ${project.artifactId}-${project.version}
+                            </finalName>
+                            <descriptorRefs>
+                                <descriptorRef>hadoop-resourceestimator
+                                </descriptorRef>
+                            </descriptorRefs>
+                        </configuration>
+                        <dependencies>
+                            <dependency>
+                                <groupId>org.apache.hadoop</groupId>
+                                <artifactId>hadoop-assemblies</artifactId>
+                                <version>${project.version}</version>
+                            </dependency>
+                        </dependencies>
+                        <executions>
+                            <execution>
+                                <id>dist</id>
+                                <phase>prepare-package</phase>
+                                <goals>
+                                    <goal>single</goal>
+                                </goals>
+                            </execution>
+                        </executions>
+                    </plugin>
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-dependency-plugin</artifactId>
+                        <executions>
+                            <execution>
+                                <id>deplist</id>
+                                <phase>compile</phase>
+                                <goals>
+                                    <goal>list</goal>
+                                </goals>
+                                <configuration>
+                                    <outputFile>
+                                        ${project.basedir}/target/hadoop-tools-deps/${project.artifactId}.tools-builtin.txt
+                                    </outputFile>
+                                </configuration>
+                            </execution>
+                        </executions>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+    </profiles>
+</project>

+ 50 - 0
hadoop-tools/hadoop-resourceestimator/src/config/checkstyle.xml

@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE module PUBLIC
+        "-//Puppy Crawl//DTD Check Configuration 1.2//EN"
+        "http://www.puppycrawl.com/dtds/configuration_1_2.dtd">
+
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!--
+  Checkstyle configuration that checks the sun coding conventions from:
+
+    - the Java Language Specification at
+      http://java.sun.com/docs/books/jls/second_edition/html/index.html
+
+    - the Sun Code Conventions at http://java.sun.com/docs/codeconv/
+
+    - the Javadoc guidelines at
+      http://java.sun.com/j2se/javadoc/writingdoccomments/index.html
+
+    - the JDK Api documentation http://java.sun.com/j2se/docs/api/index.html
+
+    - some best practices
+
+  Checkstyle is very configurable. Be sure to read the documentation at
+  http://checkstyle.sf.net (or in your downloaded distribution).
+
+  Most Checks are configurable, be sure to consult the documentation.
+  To completely disable a check, just comment it out or delete it from the file.
+  Finally, it is worth reading the documentation.
+-->
+
+<module name="Checker">
+    <module name="TreeWalker">
+        <module name="AvoidNestedBlocks">
+            <property name="id" value="checkstyle:avoidnestedblocks"/>
+            <property name="allowInSwitchCase" value="true"/>
+        </module>
+    </module>
+</module>

+ 52 - 0
hadoop-tools/hadoop-resourceestimator/src/main/bin/estimator.cmd

@@ -0,0 +1,52 @@
+@echo off
+@rem Licensed to the Apache Software Foundation (ASF) under one or more
+@rem contributor license agreements.  See the NOTICE file distributed with
+@rem this work for additional information regarding copyright ownership.
+@rem The ASF licenses this file to You under the Apache License, Version 2.0
+@rem (the "License"); you may not use this file except in compliance with
+@rem the License.  You may obtain a copy of the License at
+@rem
+@rem     http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+
+setlocal enabledelayedexpansion
+
+if not defined HADOOP_BIN_PATH (
+  set HADOOP_BIN_PATH=%~dp0
+)
+
+if "%HADOOP_BIN_PATH:~-1%" == "\" (
+  set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1%
+)
+
+set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec
+if not defined HADOOP_LIBEXEC_DIR (
+  set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR%
+)
+
+:main
+  @rem CLASSPATH initially contains HADOOP_CONF_DIR
+    if not defined HADOOP_CONF_DIR (
+      echo No HADOOP_CONF_DIR set.
+      echo Please specify it.
+      goto :eof
+    )
+
+  set CLASSPATH=%HADOOP_CONF_DIR%;%CLASSPATH%
+goto :eof
+
+:classpath
+  set CLASS=org.apache.hadoop.util.Classpath
+  goto :eof
+
+:resourceestimator
+  set CLASS=org.apache.hadoop.resourceestimator.service.ResourceEstimatorServer
+  goto :eof
+
+endlocal

+ 71 - 0
hadoop-tools/hadoop-resourceestimator/src/main/bin/estimator.sh

@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License. See accompanying LICENSE file.
+#
+
+## @audience     public
+## @stability    stable
+function hadoop_usage()
+{
+  echo "Usage: estimator.sh"
+ #hadoop-daemon.sh. need both start and stop, status (query the status). run as background process.
+}
+
+## @audience     public
+## @stability    stable
+function calculate_classpath
+{
+  hadoop_add_client_opts
+  hadoop_add_to_classpath_tools hadoop-resourceestimator
+}
+
+## @audience     public
+## @stability    stable
+function resourceestimatorcmd_case
+{
+  # shellcheck disable=SC2034
+  HADOOP_SUBCMD_SUPPORTDAEMONIZATION="true"
+  # shellcheck disable=SC2034
+  HADOOP_CLASSNAME='org.apache.hadoop.resourceestimator.service.ResourceEstimatorServer'
+}
+
+# let's locate libexec...
+if [[ -n "${HADOOP_HOME}" ]]; then
+  HADOOP_DEFAULT_LIBEXEC_DIR="${HADOOP_HOME}/libexec"
+else
+  this="${BASH_SOURCE-$0}"
+  bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+  HADOOP_DEFAULT_LIBEXEC_DIR="${bin}/../../../../../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$HADOOP_DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
+  # shellcheck source=./hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh
+  . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
+  exit 1
+fi
+
+# get arguments
+HADOOP_SUBCMD=$1
+shift
+
+HADOOP_SUBCMD_ARGS=("$@")
+
+resourceestimatorcmd_case "${HADOOP_SUBCMD}" "${HADOOP_SUBCMD_ARGS[@]}"
+
+calculate_classpath
+hadoop_generic_java_subcmd_handler

+ 37 - 0
hadoop-tools/hadoop-resourceestimator/src/main/bin/start-estimator.cmd

@@ -0,0 +1,37 @@
+@echo off
+@rem Licensed to the Apache Software Foundation (ASF) under one or more
+@rem contributor license agreements.  See the NOTICE file distributed with
+@rem this work for additional information regarding copyright ownership.
+@rem The ASF licenses this file to You under the Apache License, Version 2.0
+@rem (the "License"); you may not use this file except in compliance with
+@rem the License.  You may obtain a copy of the License at
+@rem
+@rem     http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+setlocal enabledelayedexpansion
+
+echo starting resource estimator daemons
+
+if not defined HADOOP_BIN_PATH (
+  set HADOOP_BIN_PATH=%~dp0
+)
+
+if "%HADOOP_BIN_PATH:~-1%" == "\" (
+  set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1%
+)
+
+set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec
+if not defined HADOOP_LIBEXEC_DIR (
+  set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR%
+)
+
+@rem start resource estimator
+start "Resource Estimator Service" estimator resourceestimator
+
+endlocal

+ 42 - 0
hadoop-tools/hadoop-resourceestimator/src/main/bin/start-estimator.sh

@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License. See accompanying LICENSE file.
+#
+
+# let's locate libexec...
+if [[ -n "${HADOOP_HOME}" ]]; then
+  HADOOP_DEFAULT_LIBEXEC_DIR="${HADOOP_HOME}/libexec"
+else
+  this="${BASH_SOURCE-$0}"
+  bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+  HADOOP_DEFAULT_LIBEXEC_DIR="${bin}/../../../../../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$HADOOP_DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
+  # shellcheck source=./hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh
+  . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
+  exit 1
+fi
+
+# start resource estimator
+echo "Starting resource estimator"
+hadoop_uservar_su estimator resourceestimator "bin/estimator.sh" \
+    --config "${HADOOP_CONF_DIR}" \
+    --daemon start \
+    resourceestimator
+(( HADOOP_JUMBO_RETCOUNTER=HADOOP_JUMBO_RETCOUNTER + $? ))

+ 37 - 0
hadoop-tools/hadoop-resourceestimator/src/main/bin/stop-estimator.cmd

@@ -0,0 +1,37 @@
+@echo off
+@rem Licensed to the Apache Software Foundation (ASF) under one or more
+@rem contributor license agreements.  See the NOTICE file distributed with
+@rem this work for additional information regarding copyright ownership.
+@rem The ASF licenses this file to You under the Apache License, Version 2.0
+@rem (the "License"); you may not use this file except in compliance with
+@rem the License.  You may obtain a copy of the License at
+@rem
+@rem     http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+setlocal enabledelayedexpansion
+
+echo stopping resource estimator daemons
+
+if not defined HADOOP_BIN_PATH (
+  set HADOOP_BIN_PATH=%~dp0
+)
+
+if "%HADOOP_BIN_PATH:~-1%" == "\" (
+  set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1%
+)
+
+set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec
+if not defined HADOOP_LIBEXEC_DIR (
+  set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR%
+)
+
+@rem start resource estimator
+stop "Resource Estimator Service" estimator resourceestimator
+
+endlocal

+ 42 - 0
hadoop-tools/hadoop-resourceestimator/src/main/bin/stop-estimator.sh

@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License. See accompanying LICENSE file.
+#
+
+# let's locate libexec...
+if [[ -n "${HADOOP_HOME}" ]]; then
+  HADOOP_DEFAULT_LIBEXEC_DIR="${HADOOP_HOME}/libexec"
+else
+  this="${BASH_SOURCE-$0}"
+  bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+  HADOOP_DEFAULT_LIBEXEC_DIR="${bin}/../../../../../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$HADOOP_DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
+  # shellcheck source=./hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh
+  . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
+  exit 1
+fi
+
+# stop resource estimator
+echo "Stopping resource estimator"
+hadoop_uservar_su estimator resourceestimator "bin/estimator.sh" \
+    --config "${HADOOP_CONF_DIR}" \
+    --daemon stop \
+    resourceestimator
+(( HADOOP_JUMBO_RETCOUNTER=HADOOP_JUMBO_RETCOUNTER + $? ))

+ 85 - 0
hadoop-tools/hadoop-resourceestimator/src/main/conf/resourceestimator-config.xml

@@ -0,0 +1,85 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+        http://www.apache.org/licenses/LICENSE-2.0
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+<configuration>
+    <property>
+        <name>resourceestimator.solver.lp.alpha</name>
+        <value>0.1</value>
+        <description>
+            The resource estimator has an integrated Linear Programming solver to make the prediction, and this parameter tunes the tradeoff between resource over-allocation and under-allocation in the Linear Programming model. This parameter varies from 0 to 1, and a larger alpha value means the model minimizes over-allocation better. Default value is 0.1.
+        </description>
+    </property>
+    <property>
+        <name>resourceestimator.solver.lp.beta</name>
+        <value>0.1</value>
+        <description>
+            This parameter controls the generalization of the Linear Programming model. This parameter varies from 0 to 1. Deafult value is 0.1.
+        </description>
+    </property>
+    <property>
+        <name>resourceestimator.solver.lp.minJobRuns</name>
+        <value>2</value>
+        <description>
+            The minimum number of job runs required in order to make the prediction. Default value is 2.
+        </description>
+    </property>
+    <property>
+        <name>resourceestimator.timeInterval</name>
+        <value>5</value>
+        <description>
+            The time length which is used to discretize job execution into intervals. Note that the estimator makes resource allocation prediction for each interval. A smaller time interval has more fine-grained granularity for prediction, but it also takes longer time and more space for prediction. Default value is 5 (seconds).
+        </description>
+    </property>
+    <property>
+        <name>resourceestimator.translator.line-parser</name>
+        <value>org.apache.hadoop.resourceestimator.translator.impl.NativeSingleLineParser</value>
+        <description>
+            The class name of the translator single-line parser, which parses a single line in the log. Default value is org.apache.hadoop.resourceestimator.translator.impl.NativeSingleLineParser, which can parse one line in the sample log. Note that if users want to parse Hadoop Resource Manager logs, they need to set the value to be org.apache.hadoop.resourceestimator.translator.impl.RmSingleLineParser. If they want to implement single-line parser to parse their customized log file, they need to change this value accordingly.
+        </description>
+    </property>
+    <!-- Optional configuration parameters -->
+    <!--
+    <property>
+        <name>resourceestimator.solver.provider</name>
+        <value>org.apache.hadoop.resourceestimator.solver.impl.LpSolver</value>
+        <description>
+            The class name of the solver provider. Default value is org.apache.hadoop.resourceestimator.solver.impl.LpSolver, which incorporates a Linear Programming model to make the prediction. If users want to implement their own models, they need to change this value accordingly.
+        </description>
+    </property>
+    <property>
+        <name>resourceestimator.skylinestore.provider</name>
+        <value>org.apache.hadoop.resourceestimator.skylinestore.impl.InMemoryStore</value>
+        <description>
+            The class name of the skylinestore provider. Default value is org.apache.hadoop.resourceestimator.skylinestore.impl.InMemoryStore, which is an in-memory implementation of skylinestore. If users want to use their own skylinestore implementation, they need to change this value accordingly.
+        </description>
+    </property>
+    <property>
+        <name>resourceestimator.translator.provider</name>
+        <value>org.apache.hadoop.resourceestimator.translator.impl.BaseLogParser</value>
+        <description>
+            The class name of the translator provider. Default value is org.apache.hadoop.resourceestimator.translator.impl.BaseLogParser, which extracts resourceskylines from log streams. If users want to use their own translator implementation, they need to change this value accordingly.
+        </description>
+    </property>
+    <property>
+        <name>resourceestimator.service-port</name>
+        <value>9998</value>
+        <description>
+            The port which ResourceEstimatorService listens to. The default value is 9998.
+        </description>
+    </property>
+    -->
+</configuration>

+ 2 - 0
hadoop-tools/hadoop-resourceestimator/src/main/data/resourceEstimatorService.txt

@@ -0,0 +1,2 @@
+tpch_q12 50 1462148517000 1462148527345 1462148952345 tpch_q12_0 8192:0| 1:0|1:5|1074:10|2538:15|2468:20|
+tpch_q12 50 1462148949000 1462148960455 1462149205455 tpch_q12_1 8192:0| 1:0|1:5|794:10|2517:15|2484:20|

+ 95 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/api/RecurrenceId.java

@@ -0,0 +1,95 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.common.api;
+
+/**
+ * RecurrenceId is the id for the recurring pipeline jobs.
+ * <p> We assume that the pipeline job can be uniquely identified with
+ * {pipelineId, runId}.
+ */
+public class RecurrenceId {
+  /**
+   * pipelineId is the unique id for the pipeline jobs.
+   */
+  private String pipelineId;
+  /**
+   * runId is the unique instance id for the pipeline job in one run, and it
+   * will change across runs.
+   */
+  private String runId;
+  // TODO: we may addHistory more ids of the pipeline jobs to identify them.
+
+  /**
+   * Constructor.
+   *
+   * @param pipelineIdConfig the unique id for the pipeline jobs.
+   * @param runIdConfig the unique instance id for the pipeline job in one run.
+   */
+  public RecurrenceId(final String pipelineIdConfig, final String runIdConfig) {
+    this.pipelineId = pipelineIdConfig;
+    this.runId = runIdConfig;
+  }
+
+  /**
+   * Return the pipelineId for the pipeline jobs.
+   *
+   * @return the pipelineId.
+   */
+  public final String getPipelineId() {
+    return pipelineId;
+  }
+
+  public void setPipelineId(String pipelineId) {
+    this.pipelineId = pipelineId;
+  }
+
+  /**
+   * Return the runId for the pipeline job in one run.
+   *
+   * @return the runId.
+   */
+  public final String getRunId() {
+    return runId;
+  }
+
+  public void setRunId(String runId) {
+    this.runId = runId;
+  }
+
+  @Override public final String toString() {
+    return String.format("{pipelineId: %s, runId: %s}", pipelineId, runId);
+  }
+
+  @Override public final int hashCode() {
+    return getPipelineId().hashCode() ^ getRunId().hashCode();
+  }
+
+  @Override public final boolean equals(final Object obj) {
+    if (this == obj) {
+      return true;
+    }
+    if ((obj == null) || (getClass() != obj.getClass())) {
+      return false;
+    }
+    final RecurrenceId other = (RecurrenceId) obj;
+    return pipelineId.equals(other.pipelineId) && runId.equals(other.runId);
+  }
+}

+ 211 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/api/ResourceSkyline.java

@@ -0,0 +1,211 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.common.api;
+
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation;
+
+/**
+ * ResourceSkyline records the job identification information as well as job's
+ * requested {@code
+ * container}s information during its lifespan.
+ */
+public class ResourceSkyline {
+  /**
+   * The auto-generated {@code ApplicationId} in job's one run.
+   * <p>
+   * <p>
+   * For a pipeline job, we assume the {@code jobId} changes each time we run
+   * the pipeline job.
+   */
+  private String jobId;
+  /**
+   * The input data size of the job.
+   */
+  private double jobInputDataSize;
+  /**
+   * Job submission time. Different logs could have different time format, so we
+   * store the original string directly extracted from logs.
+   */
+  private long jobSubmissionTime;
+  /**
+   * Job finish time. Different logs could have different time format, so we
+   * store the original string directly extracted from logs.
+   */
+  private long jobFinishTime;
+  /**
+   * The resource spec of containers allocated to the job.
+   */
+  private Resource containerSpec;
+  /**
+   * The list of {@link Resource} allocated to the job in its lifespan.
+   */
+  private RLESparseResourceAllocation skylineList;
+  // TODO
+  // We plan to record pipeline job's actual resource consumptions in one run
+  // here.
+  // TODO
+  // We might need to addHistory more features to the ResourceSkyline, such as
+  // users, VC, etc.
+
+  /**
+   * Constructor.
+   *
+   * @param jobIdConfig             the id of the job.
+   * @param jobInputDataSizeConfig  the input data size of the job.
+   * @param jobSubmissionTimeConfig the submission time of the job.
+   * @param jobFinishTimeConfig     the finish time of the job.
+   * @param containerSpecConfig     the resource spec of containers allocated
+   *                                to the job.
+   * @param skylineListConfig       the list of {@link Resource} allocated in
+   *                                one run.
+   */
+  public ResourceSkyline(final String jobIdConfig,
+      final double jobInputDataSizeConfig, final long jobSubmissionTimeConfig,
+      final long jobFinishTimeConfig, final Resource containerSpecConfig,
+      final RLESparseResourceAllocation skylineListConfig) {
+    this.jobId = jobIdConfig;
+    this.jobInputDataSize = jobInputDataSizeConfig;
+    this.jobSubmissionTime = jobSubmissionTimeConfig;
+    this.jobFinishTime = jobFinishTimeConfig;
+    this.containerSpec = containerSpecConfig;
+    this.skylineList = skylineListConfig;
+  }
+
+  /**
+   * Empty constructor.
+   */
+  public ResourceSkyline() {
+  }
+
+  /**
+   * Get the id of the job.
+   *
+   * @return the id of this job.
+   */
+  public final String getJobId() {
+    return jobId;
+  }
+
+  /**
+   * Set jobId.
+   *
+   * @param jobIdConfig jobId.
+   */
+  public final void setJobId(final String jobIdConfig) {
+    this.jobId = jobIdConfig;
+  }
+
+  /**
+   * Get the job's input data size.
+   *
+   * @return job's input data size.
+   */
+  public final double getJobInputDataSize() {
+    return jobInputDataSize;
+  }
+
+  /**
+   * Set jobInputDataSize.
+   *
+   * @param jobInputDataSizeConfig jobInputDataSize.
+   */
+  public final void setJobInputDataSize(final double jobInputDataSizeConfig) {
+    this.jobInputDataSize = jobInputDataSizeConfig;
+  }
+
+  /**
+   * Get the job's submission time.
+   *
+   * @return job's submission time.
+   */
+  public final long getJobSubmissionTime() {
+    return jobSubmissionTime;
+  }
+
+  /**
+   * Set jobSubmissionTime.
+   *
+   * @param jobSubmissionTimeConfig jobSubmissionTime.
+   */
+  public final void setJobSubmissionTime(final long jobSubmissionTimeConfig) {
+    this.jobSubmissionTime = jobSubmissionTimeConfig;
+  }
+
+  /**
+   * Get the job's finish time.
+   *
+   * @return job's finish time.
+   */
+  public final long getJobFinishTime() {
+    return jobFinishTime;
+  }
+
+  /**
+   * Set jobFinishTime.
+   *
+   * @param jobFinishTimeConfig jobFinishTime.
+   */
+  public final void setJobFinishTime(final long jobFinishTimeConfig) {
+    this.jobFinishTime = jobFinishTimeConfig;
+  }
+
+  /**
+   * Get the resource spec of the job's allocated {@code container}s.
+   * <p> Key assumption: during job's lifespan, its allocated {@code container}s
+   * have the same {@link Resource} spec.
+   *
+   * @return the {@link Resource} spec of the job's allocated
+   * {@code container}s.
+   */
+  public final Resource getContainerSpec() {
+    return containerSpec;
+  }
+
+  /**
+   * Set containerSpec.
+   *
+   * @param containerSpecConfig containerSpec.
+   */
+  public final void setContainerSpec(final Resource containerSpecConfig) {
+    this.containerSpec = containerSpecConfig;
+  }
+
+  /**
+   * Get the list of {@link Resource}s allocated to the job.
+   *
+   * @return the {@link RLESparseResourceAllocation} which contains the list of
+   * {@link Resource}s allocated to the job.
+   */
+  public final RLESparseResourceAllocation getSkylineList() {
+    return skylineList;
+  }
+
+  /**
+   * Set skylineList.
+   *
+   * @param skylineListConfig skylineList.
+   */
+  public final void setSkylineList(
+      final RLESparseResourceAllocation skylineListConfig) {
+    this.skylineList = skylineListConfig;
+  }
+}

+ 23 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/api/package-info.java

@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * The common APIs for the resource estimator.
+ */
+
+package org.apache.hadoop.resourceestimator.common.api;

+ 125 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/config/ResourceEstimatorConfiguration.java

@@ -0,0 +1,125 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.common.config;
+
+import org.apache.hadoop.resourceestimator.skylinestore.impl.InMemoryStore;
+import org.apache.hadoop.resourceestimator.solver.impl.LpSolver;
+import org.apache.hadoop.resourceestimator.translator.impl.BaseLogParser;
+import org.apache.hadoop.resourceestimator.translator.impl.NativeSingleLineParser;
+
+/**
+ * Defines configuration keys for ResourceEstimatorServer.
+ */
+public final class ResourceEstimatorConfiguration {
+
+  /**
+   * The location of the configuration file for ResourceEstimatorService.
+   */
+  public static final String CONFIG_FILE = "resourceestimator-config.xml";
+
+  /**
+   * The URI for ResourceEstimatorService.
+   */
+  public static final String SERVICE_URI = "http://0.0.0.0/";
+
+  /**
+   * The port which ResourceEstimatorService listens to.
+   */
+  public static final String SERVICE_PORT = "resourceestimator.service-port";
+
+  /**
+   * Default port number of ResourceEstimatorService.
+   */
+  public static final int DEFAULT_SERVICE_PORT = 9998;
+
+  /**
+   * The class name of the skylinestore provider.
+   */
+  public static final String SKYLINESTORE_PROVIDER =
+      "resourceestimator.skylinestore.provider";
+
+  /**
+   * Default value for skylinestore provider, which is an in-memory implementation of skylinestore.
+   */
+  public static final String DEFAULT_SKYLINESTORE_PROVIDER =
+      InMemoryStore.class.getName();
+
+  /**
+   * The class name of the translator provider.
+   */
+  public static final String TRANSLATOR_PROVIDER =
+      "resourceestimator.translator.provider";
+
+  /**
+   * Default value for translator provider, which extracts resourceskylines from log streams.
+   */
+  public static final String DEFAULT_TRANSLATOR_PROVIDER =
+      BaseLogParser.class.getName();
+
+  /**
+   * The class name of the translator single-line parser, which parses a single line in the log.
+   */
+  public static final String TRANSLATOR_LINE_PARSER =
+      "resourceestimator.translator.line-parser";
+
+  /**
+   * Default value for translator single-line parser, which can parse one line in the sample log.
+   */
+  public static final String DEFAULT_TRANSLATOR_LINE_PARSER =
+      NativeSingleLineParser.class.getName();
+
+  /**
+   * The class name of the solver provider.
+   */
+  public static final String SOLVER_PROVIDER =
+      "resourceestimator.solver.provider";
+
+  /**
+   * Default value for solver provider, which incorporates a Linear Programming model to make the prediction.
+   */
+  public static final String DEFAULT_SOLVER_PROVIDER = LpSolver.class.getName();
+
+  /**
+   * The time length which is used to discretize job execution into intervals.
+   */
+  public static final String TIME_INTERVAL_KEY =
+      "resourceestimator.timeInterval";
+
+  /**
+   * The parameter which tunes the tradeoff between resource over-allocation and under-allocation in the Linear Programming model.
+   */
+  public static final String SOLVER_ALPHA_KEY =
+      "resourceestimator.solver.lp.alpha";
+
+  /**
+   * This parameter which controls the generalization of the Linear Programming model.
+   */
+  public static final String SOLVER_BETA_KEY =
+      "resourceestimator.solver.lp.beta";
+
+  /**
+   * The minimum number of job runs required in order to make the prediction.
+   */
+  public static final String SOLVER_MIN_JOB_RUN_KEY =
+      "resourceestimator.solver.lp.minJobRuns";
+
+  private ResourceEstimatorConfiguration() {}
+}

+ 81 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/config/ResourceEstimatorUtil.java

@@ -0,0 +1,81 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.common.config;
+
+import java.lang.reflect.Constructor;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.resourceestimator.common.exception.ResourceEstimatorException;
+
+/**
+ * General resourceestimator utils.
+ */
+public final class ResourceEstimatorUtil {
+
+  private static final Class<?>[] EMPTY_ARRAY = new Class[0];
+
+  private ResourceEstimatorUtil() {}
+
+  /**
+   * Helper method to create instances of Object using the class name specified
+   * in the configuration object.
+   *
+   * @param conf                the yarn configuration
+   * @param configuredClassName the configuration provider key
+   * @param defaultValue        the default implementation class
+   * @param type                the required interface/base class
+   * @param <T>                 The type of the instance to create
+   * @return the instances created
+   * @throws ResourceEstimatorException if the provider initialization fails.
+   */
+  @SuppressWarnings("unchecked") public static <T> T createProviderInstance(
+      Configuration conf, String configuredClassName, String defaultValue,
+      Class<T> type) throws ResourceEstimatorException {
+    String className = conf.get(configuredClassName);
+    if (className == null) {
+      className = defaultValue;
+    }
+    try {
+      Class<?> concreteClass = Class.forName(className);
+      if (type.isAssignableFrom(concreteClass)) {
+        Constructor<T> meth =
+            (Constructor<T>) concreteClass.getDeclaredConstructor(EMPTY_ARRAY);
+        meth.setAccessible(true);
+        return meth.newInstance();
+      } else {
+        StringBuilder errMsg = new StringBuilder();
+        errMsg.append("Class: ").append(className).append(" not instance of ")
+            .append(type.getCanonicalName());
+        throw new ResourceEstimatorException(errMsg.toString());
+      }
+    } catch (ClassNotFoundException e) {
+      StringBuilder errMsg = new StringBuilder();
+      errMsg.append("Could not instantiate : ").append(className)
+          .append(" due to exception: ").append(e.getCause());
+      throw new ResourceEstimatorException(errMsg.toString());
+    } catch (ReflectiveOperationException e) {
+      StringBuilder errMsg = new StringBuilder();
+      errMsg.append("Could not instantiate : ").append(className)
+          .append(" due to exception: ").append(e.getCause());
+      throw new ResourceEstimatorException(errMsg.toString());
+    }
+  }
+}

+ 23 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/config/package-info.java

@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * The common configuration for the resource estimator.
+ */
+
+package org.apache.hadoop.resourceestimator.common.config;

+ 35 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/exception/ResourceEstimatorException.java

@@ -0,0 +1,35 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.common.exception;
+
+/**
+ * Exception thrown by ResourceEstimatorServer utility classes.
+ */
+public class ResourceEstimatorException extends Exception {
+
+  public ResourceEstimatorException(String message) {
+    super(message);
+  }
+
+  public ResourceEstimatorException(String message, Exception ex) {
+    super(message, ex);
+  }
+}

+ 23 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/exception/package-info.java

@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * The common exception for the resource estimator.
+ */
+
+package org.apache.hadoop.resourceestimator.common.exception;

+ 77 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/serialization/RLESparseResourceAllocationSerDe.java

@@ -0,0 +1,77 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.common.serialization;
+
+import java.lang.reflect.Type;
+import java.util.NavigableMap;
+
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation;
+import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
+import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.JsonDeserializationContext;
+import com.google.gson.JsonDeserializer;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParseException;
+import com.google.gson.JsonSerializationContext;
+import com.google.gson.JsonSerializer;
+import com.google.gson.reflect.TypeToken;
+
+/**
+ * Serialize/deserialize RLESparseResourceAllocation object to/from JSON.
+ */
+public class RLESparseResourceAllocationSerDe
+    implements JsonSerializer<RLESparseResourceAllocation>,
+    JsonDeserializer<RLESparseResourceAllocation> {
+  private static final String KEY = "resourceAllocation";
+  private final Gson gson =
+      new GsonBuilder().registerTypeAdapter(Resource.class, new ResourceSerDe())
+          .create();
+  private final Type type = new TypeToken<NavigableMap<Long, Resource>>() {
+  }.getType();
+  private final ResourceCalculator resourceCalculator =
+      new DefaultResourceCalculator();
+
+  @Override public final JsonElement serialize(
+      final RLESparseResourceAllocation resourceAllocation,
+      final Type typeOfSrc, final JsonSerializationContext context) {
+    NavigableMap<Long, Resource> myMap = resourceAllocation.getCumulative();
+    JsonObject jo = new JsonObject();
+    JsonElement element = gson.toJsonTree(myMap, type);
+    jo.add(KEY, element);
+
+    return jo;
+  }
+
+  @Override public final RLESparseResourceAllocation deserialize(
+      final JsonElement json, final Type typeOfT,
+      final JsonDeserializationContext context) throws JsonParseException {
+    NavigableMap<Long, Resource> resAllocation =
+        gson.fromJson(json.getAsJsonObject().get(KEY), type);
+    RLESparseResourceAllocation rleSparseResourceAllocation =
+        new RLESparseResourceAllocation(resAllocation, resourceCalculator);
+    return rleSparseResourceAllocation;
+  }
+}

+ 61 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/serialization/ResourceSerDe.java

@@ -0,0 +1,61 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.common.serialization;
+
+import java.lang.reflect.Type;
+
+import org.apache.hadoop.yarn.api.records.Resource;
+
+import com.google.gson.JsonDeserializationContext;
+import com.google.gson.JsonDeserializer;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParseException;
+import com.google.gson.JsonSerializationContext;
+import com.google.gson.JsonSerializer;
+
+/**
+ * Serialize/deserialize Resource object to/from JSON.
+ */
+public class ResourceSerDe
+    implements JsonSerializer<Resource>, JsonDeserializer<Resource> {
+  private static final String KEY1 = "memory";
+  private static final String KEY2 = "vcores";
+
+  @Override public final JsonElement serialize(final Resource resource,
+      final Type type, final JsonSerializationContext context) {
+    JsonObject jo = new JsonObject();
+    jo.addProperty(KEY1, resource.getMemorySize());
+    jo.addProperty(KEY2, resource.getVirtualCores());
+    return jo;
+  }
+
+  @Override public final Resource deserialize(final JsonElement json,
+      final Type type, final JsonDeserializationContext context)
+      throws JsonParseException {
+    JsonObject jo = json.getAsJsonObject();
+    long mem = jo.getAsJsonPrimitive(KEY1).getAsLong();
+    int vcore = jo.getAsJsonPrimitive(KEY2).getAsInt();
+    Resource resource = Resource.newInstance(mem, vcore);
+
+    return resource;
+  }
+}

+ 24 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/common/serialization/package-info.java

@@ -0,0 +1,24 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+/**
+ * SkylineStore serialization module.
+ */
+package org.apache.hadoop.resourceestimator.common.serialization;

+ 146 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/service/ResourceEstimatorServer.java

@@ -0,0 +1,146 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.service;
+
+import java.io.IOException;
+import java.net.URI;
+
+import javax.ws.rs.core.UriBuilder;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.http.HttpServer2;
+import org.apache.hadoop.resourceestimator.common.config.ResourceEstimatorConfiguration;
+import org.apache.hadoop.service.CompositeService;
+import org.apache.hadoop.util.ShutdownHookManager;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.webapp.GenericExceptionHandler;
+import org.apache.hadoop.yarn.webapp.YarnJacksonJaxbJsonProvider;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A simple embedded Hadoop HTTP server.
+ */
+public final class ResourceEstimatorServer extends CompositeService {
+  private static final Logger LOGGER =
+      LoggerFactory.getLogger(ResourceEstimatorServer.class.getName());
+  private HttpServer2 webServer;
+  private static URI baseURI;
+
+  public ResourceEstimatorServer() {
+    super(ResourceEstimatorServer.class.getName());
+  }
+
+  private static URI getBaseURI(Configuration config) {
+    baseURI = UriBuilder.fromUri(ResourceEstimatorConfiguration.SERVICE_URI)
+        .port(getPort(config)).build();
+    return baseURI;
+  }
+
+  private static int getPort(Configuration config) {
+    return config.getInt(ResourceEstimatorConfiguration.SERVICE_PORT,
+        ResourceEstimatorConfiguration.DEFAULT_SERVICE_PORT);
+  }
+
+  @Override protected void serviceInit(Configuration conf) throws Exception {
+    super.serviceInit(conf);
+  }
+
+  @Override protected void serviceStart() throws Exception {
+    super.serviceStart();
+    startResourceEstimatorApp();
+  }
+
+  private void join() {
+    // keep the main thread that started the server up until it receives a stop
+    // signal
+    if (webServer != null) {
+      try {
+        webServer.join();
+      } catch (InterruptedException ignore) {
+      }
+    }
+  }
+
+  @Override protected void serviceStop() throws Exception {
+    if (webServer != null) {
+      webServer.stop();
+    }
+    super.serviceStop();
+  }
+
+  private void startResourceEstimatorApp() throws IOException {
+    Configuration config = new YarnConfiguration();
+    config.addResource(ResourceEstimatorConfiguration.CONFIG_FILE);
+    HttpServer2.Builder builder =
+        new HttpServer2.Builder().setName("ResourceEstimatorServer")
+            .setConf(config)
+            //.setFindPort(true)
+            .addEndpoint(getBaseURI(config));
+    webServer = builder.build();
+    webServer.addJerseyResourcePackage(
+        ResourceEstimatorService.class.getPackage().getName() + ";"
+            + GenericExceptionHandler.class.getPackage().getName() + ";"
+            + YarnJacksonJaxbJsonProvider.class.getPackage().getName(), "/*");
+    webServer.start();
+  }
+
+  /**
+   * Start embedded Hadoop HTTP server.
+   *
+   * @return an instance of the started HTTP server.
+   * @throws IOException in case there is an error while starting server.
+   */
+  static ResourceEstimatorServer startResourceEstimatorServer()
+      throws IOException, InterruptedException {
+    Configuration config = new YarnConfiguration();
+    config.addResource(ResourceEstimatorConfiguration.CONFIG_FILE);
+    ResourceEstimatorServer resourceEstimatorServer = null;
+    try {
+      resourceEstimatorServer = new ResourceEstimatorServer();
+      ShutdownHookManager.get().addShutdownHook(
+          new CompositeServiceShutdownHook(resourceEstimatorServer), 30);
+      resourceEstimatorServer.init(config);
+      resourceEstimatorServer.start();
+    } catch (Throwable t) {
+      LOGGER.error("Error starting ResourceEstimatorServer", t);
+    }
+
+    return resourceEstimatorServer;
+  }
+
+  public static void main(String[] args)
+      throws InterruptedException, IOException {
+    ResourceEstimatorServer server = startResourceEstimatorServer();
+    server.join();
+  }
+
+  /**
+   * Stop embedded Hadoop HTTP server.
+   *
+   * @throws Exception in case the HTTP server fails to shut down.
+   */
+  public void shutdown() throws Exception {
+    LOGGER.info("Stopping resourceestimator service at: {}.",
+        baseURI.toString());
+    webServer.stop();
+  }
+}

+ 238 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/service/ResourceEstimatorService.java

@@ -0,0 +1,238 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.service;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.reflect.Type;
+import java.util.List;
+import java.util.Map;
+
+import javax.ws.rs.DELETE;
+import javax.ws.rs.GET;
+import javax.ws.rs.POST;
+import javax.ws.rs.Path;
+import javax.ws.rs.PathParam;
+import javax.ws.rs.Produces;
+import javax.ws.rs.core.MediaType;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.resourceestimator.common.api.RecurrenceId;
+import org.apache.hadoop.resourceestimator.common.api.ResourceSkyline;
+import org.apache.hadoop.resourceestimator.common.config.ResourceEstimatorConfiguration;
+import org.apache.hadoop.resourceestimator.common.config.ResourceEstimatorUtil;
+import org.apache.hadoop.resourceestimator.common.exception.ResourceEstimatorException;
+import org.apache.hadoop.resourceestimator.common.serialization.RLESparseResourceAllocationSerDe;
+import org.apache.hadoop.resourceestimator.common.serialization.ResourceSerDe;
+import org.apache.hadoop.resourceestimator.skylinestore.api.SkylineStore;
+import org.apache.hadoop.resourceestimator.skylinestore.exceptions.SkylineStoreException;
+import org.apache.hadoop.resourceestimator.solver.api.Solver;
+import org.apache.hadoop.resourceestimator.solver.exceptions.SolverException;
+import org.apache.hadoop.resourceestimator.translator.api.LogParser;
+import org.apache.hadoop.resourceestimator.translator.impl.LogParserUtil;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.reflect.TypeToken;
+import com.google.inject.Singleton;
+
+/**
+ * Resource Estimator Service which provides a set of REST APIs for users to
+ * use the estimation service.
+ */
+@Singleton @Path("/resourceestimator") public class ResourceEstimatorService {
+  private static final Logger LOGGER =
+      LoggerFactory.getLogger(ResourceEstimatorService.class);
+  private static SkylineStore skylineStore;
+  private static Solver solver;
+  private static LogParser logParser;
+  private static LogParserUtil logParserUtil = new LogParserUtil();
+  private static Configuration config;
+  private static Gson gson;
+  private static Type rleType;
+  private static Type skylineStoreType;
+
+  public ResourceEstimatorService() throws ResourceEstimatorException {
+    if (skylineStore == null) {
+      try {
+        config = new Configuration();
+        config.addResource(ResourceEstimatorConfiguration.CONFIG_FILE);
+        skylineStore = ResourceEstimatorUtil.createProviderInstance(config,
+            ResourceEstimatorConfiguration.SKYLINESTORE_PROVIDER,
+            ResourceEstimatorConfiguration.DEFAULT_SKYLINESTORE_PROVIDER,
+            SkylineStore.class);
+        logParser = ResourceEstimatorUtil.createProviderInstance(config,
+            ResourceEstimatorConfiguration.TRANSLATOR_PROVIDER,
+            ResourceEstimatorConfiguration.DEFAULT_TRANSLATOR_PROVIDER,
+            LogParser.class);
+        logParser.init(config, skylineStore);
+        logParserUtil.setLogParser(logParser);
+        solver = ResourceEstimatorUtil.createProviderInstance(config,
+            ResourceEstimatorConfiguration.SOLVER_PROVIDER,
+            ResourceEstimatorConfiguration.DEFAULT_SOLVER_PROVIDER,
+            Solver.class);
+        solver.init(config, skylineStore);
+      } catch (Exception ex) {
+        LOGGER
+            .error("Server initialization failed due to: {}", ex.getMessage());
+        throw new ResourceEstimatorException(ex.getMessage(), ex);
+      }
+      gson = new GsonBuilder()
+          .registerTypeAdapter(Resource.class, new ResourceSerDe())
+          .registerTypeAdapter(RLESparseResourceAllocation.class,
+              new RLESparseResourceAllocationSerDe())
+          .enableComplexMapKeySerialization().create();
+      rleType = new TypeToken<RLESparseResourceAllocation>() {
+      }.getType();
+      skylineStoreType =
+          new TypeToken<Map<RecurrenceId, List<ResourceSkyline>>>() {
+          }.getType();
+    }
+  }
+
+  /**
+   * Parse the log file. See also {@link LogParser#parseStream(InputStream)}.
+   *
+   * @param logFile file/directory of the log to be parsed.
+   * @throws IOException                if fails to parse the log.
+   * @throws SkylineStoreException      if fails to addHistory to
+   *                                    {@link SkylineStore}.
+   * @throws ResourceEstimatorException if the {@link LogParser}
+   *     is not initialized.
+   */
+  @POST @Path("/translator/{logFile : .+}") public void parseFile(
+      @PathParam("logFile") String logFile)
+      throws IOException, SkylineStoreException, ResourceEstimatorException {
+    logParserUtil.parseLog(logFile);
+    LOGGER.debug("Parse logFile: {}.", logFile);
+  }
+
+  /**
+   * Get predicted {code Resource} allocation for the pipeline. If the
+   * prediction for the pipeline already exists in the {@link SkylineStore}, it
+   * will directly get the prediction from {@link SkylineStore}, otherwise it
+   * will call the {@link Solver} to make prediction, and store the predicted
+   * {code Resource} allocation to the {@link SkylineStore}. Note that invoking
+   * {@link Solver} could be a time-consuming operation.
+   *
+   * @param pipelineId the id of the pipeline.
+   * @return Json format of {@link RLESparseResourceAllocation}.
+   * @throws SolverException       if {@link Solver} fails;
+   * @throws SkylineStoreException if fails to get history
+   *     {@link ResourceSkyline} or predicted {code Resource} allocation
+   *     from {@link SkylineStore}.
+   */
+  @GET @Path("/estimator/{pipelineId}") @Produces(MediaType.APPLICATION_JSON)
+  public String getPrediction(
+      @PathParam(value = "pipelineId") String pipelineId)
+      throws SolverException, SkylineStoreException {
+    // first, try to grab the predicted resource allocation from the skyline
+    // store
+    RLESparseResourceAllocation result = skylineStore.getEstimation(pipelineId);
+    // if received resource allocation is null, then run the solver
+    if (result == null) {
+      RecurrenceId recurrenceId = new RecurrenceId(pipelineId, "*");
+      Map<RecurrenceId, List<ResourceSkyline>> jobHistory =
+          skylineStore.getHistory(recurrenceId);
+      result = solver.solve(jobHistory);
+    }
+    final String prediction = gson.toJson(result, rleType);
+    LOGGER.debug("Predict resource requests for pipelineId: {}." + pipelineId);
+
+    return prediction;
+  }
+
+  /**
+   * Get history {@link ResourceSkyline} from {@link SkylineStore}. This
+   * function supports the following special wildcard operations regarding
+   * {@link RecurrenceId}: If the {@code pipelineId} is "*", it will return all
+   * entries in the store; else, if the {@code runId} is "*", it will return all
+   * {@link ResourceSkyline}s belonging to the {@code pipelineId}; else, it will
+   * return all {@link ResourceSkyline}s belonging to the {{@code pipelineId},
+   * {@code runId}}. If the {@link RecurrenceId} does not exist, it will not do
+   * anything.
+   *
+   * @param pipelineId pipelineId of the history run.
+   * @param runId      runId of the history run.
+   * @return Json format of history {@link ResourceSkyline}s.
+   * @throws SkylineStoreException if fails to getHistory
+   *     {@link ResourceSkyline} from {@link SkylineStore}.
+   */
+  @GET @Path("/skylinestore/history/{pipelineId}/{runId}")
+  @Produces(MediaType.APPLICATION_JSON)
+  public String getHistoryResourceSkyline(
+      @PathParam("pipelineId") String pipelineId,
+      @PathParam("runId") String runId) throws SkylineStoreException {
+    RecurrenceId recurrenceId = new RecurrenceId(pipelineId, runId);
+    Map<RecurrenceId, List<ResourceSkyline>> jobHistory =
+        skylineStore.getHistory(recurrenceId);
+    final String skyline = gson.toJson(jobHistory, skylineStoreType);
+    LOGGER
+        .debug("Query the skyline store for recurrenceId: {}." + recurrenceId);
+
+    recurrenceId = new RecurrenceId("*", "*");
+    jobHistory = skylineStore.getHistory(recurrenceId);
+
+    return skyline;
+  }
+
+  /**
+   * Get estimated {code Resource} allocation for the pipeline.
+   *
+   * @param pipelineId id of the pipeline.
+   * @return Json format of {@link RLESparseResourceAllocation}.
+   * @throws SkylineStoreException if fails to get estimated {code Resource}
+   *                               allocation from {@link SkylineStore}.
+   */
+  @GET @Path("/skylinestore/estimation/{pipelineId}")
+  @Produces(MediaType.APPLICATION_JSON)
+  public String getEstimatedResourceAllocation(
+      @PathParam("pipelineId") String pipelineId) throws SkylineStoreException {
+    RLESparseResourceAllocation result = skylineStore.getEstimation(pipelineId);
+    final String skyline = gson.toJson(result, rleType);
+    LOGGER.debug("Query the skyline store for pipelineId: {}." + pipelineId);
+
+    return skyline;
+  }
+
+  /**
+   * Delete history {@link ResourceSkyline}s from {@link SkylineStore}.
+   * <p> Note that for safety considerations, we only allow users to delete
+   * history {@link ResourceSkyline}s of one job run.
+   *
+   * @param pipelineId pipelineId of the history run.
+   * @param runId      runId runId of the history run.
+   * @throws SkylineStoreException if fails to deleteHistory
+   *                               {@link ResourceSkyline}s.
+   */
+  @DELETE @Path("/skylinestore/history/{pipelineId}/{runId}")
+  public void deleteHistoryResourceSkyline(
+      @PathParam("pipelineId") String pipelineId,
+      @PathParam("runId") String runId) throws SkylineStoreException {
+    RecurrenceId recurrenceId = new RecurrenceId(pipelineId, runId);
+    skylineStore.deleteHistory(recurrenceId);
+    LOGGER.info("Delete ResourceSkyline for recurrenceId: {}.", recurrenceId);
+  }
+}

+ 45 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/service/ShutdownHook.java

@@ -0,0 +1,45 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.service;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Simple shutdown hook for {@link ResourceEstimatorServer}.
+ */
+public class ShutdownHook extends Thread {
+  private static final Logger LOGGER =
+      LoggerFactory.getLogger(ShutdownHook.class);
+  private final ResourceEstimatorServer server;
+
+  ShutdownHook(ResourceEstimatorServer server) {
+    this.server = server;
+  }
+
+  public void run() {
+    try {
+      server.shutdown();
+    } catch (Exception e) {
+      LOGGER.error("HttpServer fails to shut down!");
+    }
+  }
+}

+ 23 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/service/package-info.java

@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Resource estimator service.
+ */
+
+package org.apache.hadoop.resourceestimator.service;

+ 99 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/api/HistorySkylineStore.java

@@ -0,0 +1,99 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.skylinestore.api;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.resourceestimator.common.api.RecurrenceId;
+import org.apache.hadoop.resourceestimator.common.api.ResourceSkyline;
+import org.apache.hadoop.resourceestimator.skylinestore.exceptions.SkylineStoreException;
+
+/**
+ * HistorySkylineStore stores pipeline job's {@link ResourceSkyline}s in all
+ * runs. {@code Estimator} will query the {@link ResourceSkyline}s for pipeline
+ * jobs. {@code Parser} will parse various types of job logs, construct
+ * {@link ResourceSkyline}s out of the logs and store them in the SkylineStore.
+ */
+public interface HistorySkylineStore {
+  /**
+   * Add job's resource skyline to the <em>store</em> indexed by the job's
+   * {@link RecurrenceId}. {@link RecurrenceId} is used to identify recurring
+   * pipeline jobs, and we assume that {@code
+   * ResourceEstimatorServer} users will provide the correct
+   * {@link RecurrenceId}. <p> If {@link ResourceSkyline}s to be added contain
+   * <em>null</em> elements, the function will skip them.
+   *
+   * @param recurrenceId     the unique id of user's recurring pipeline jobs.
+   * @param resourceSkylines the list of {@link ResourceSkyline}s in one run.
+   * @throws SkylineStoreException if: (1) input parameters are invalid; (2)
+   *     {@link ResourceSkyline}s to be added contain some duplicate
+   *     {@link RecurrenceId}s which already exist in the
+   *     {@link HistorySkylineStore}.
+   */
+  void addHistory(RecurrenceId recurrenceId,
+      List<ResourceSkyline> resourceSkylines) throws SkylineStoreException;
+
+  /**
+   * Delete all {@link ResourceSkyline}s belonging to given
+   * {@link RecurrenceId}.
+   * <p> Note that for safety considerations, we only allow users to
+   * deleteHistory {@link ResourceSkyline}s of one job run.
+   *
+   * @param recurrenceId the unique id of user's recurring pipeline jobs.
+   * @throws SkylineStoreException if: (1) input parameters are invalid; (2)
+   *     recurrenceId does not exist in the {@link HistorySkylineStore}.
+   */
+  void deleteHistory(RecurrenceId recurrenceId) throws SkylineStoreException;
+
+  /**
+   * Update {@link RecurrenceId} with given {@link ResourceSkyline}s. This
+   * function will deleteHistory all the {@link ResourceSkyline}s belonging to
+   * the {@link RecurrenceId}, and re-insert the given {@link ResourceSkyline}s
+   * to the SkylineStore.
+   * <p> If {@link ResourceSkyline}s contain <em>null</em> elements,
+   * the function will skip them.
+   *
+   * @param recurrenceId     the unique id of the pipeline job.
+   * @param resourceSkylines the list of {@link ResourceSkyline}s in one run.
+   * @throws SkylineStoreException if: (1) input parameters are invalid; (2)
+   *     recurrenceId does not exist in the SkylineStore.
+   */
+  void updateHistory(RecurrenceId recurrenceId,
+      List<ResourceSkyline> resourceSkylines) throws SkylineStoreException;
+
+  /**
+   * Return all {@link ResourceSkyline}s belonging to {@link RecurrenceId}.
+   * <p> This function supports the following special wildcard operations
+   * regarding {@link RecurrenceId}: If the {@code pipelineId} is "*", it will
+   * return all entries in the store; else, if the {@code runId} is "*", it
+   * will return all {@link ResourceSkyline}s belonging to the
+   * {@code pipelineId}; else, it will return all {@link ResourceSkyline}s
+   * belonging to the {{@code pipelineId}, {@code runId}}. If the
+   * {@link RecurrenceId} does not exist, it will return <em>null</em>.
+   *
+   * @param recurrenceId the unique id of the pipeline job.
+   * @return all {@link ResourceSkyline}s belonging to the recurrenceId.
+   * @throws SkylineStoreException if recurrenceId is <em>null</em>.
+   */
+  Map<RecurrenceId, List<ResourceSkyline>> getHistory(RecurrenceId recurrenceId)
+      throws SkylineStoreException;
+}

+ 60 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/api/PredictionSkylineStore.java

@@ -0,0 +1,60 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.skylinestore.api;
+
+import org.apache.hadoop.resourceestimator.skylinestore.exceptions.SkylineStoreException;
+import org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation;
+
+/**
+ * PredictionSkylineStore stores the predicted
+ * {@code RLESparseResourceAllocation} of a job as computed by the
+ * {@code Estimator} based on the {@code ResourceSkyline}s of past executions in
+ * the {@code HistorySkylineStore}.
+ */
+public interface PredictionSkylineStore {
+
+  /**
+   * Add job's predicted {@code Resource} allocation to the <em>store</em>
+   * indexed by the {@code
+   * pipelineId}.
+   * <p> Note that right now we only keep the latest copy of predicted
+   * {@code Resource} allocation for the recurring pipeline.
+   *
+   * @param pipelineId       the id of the recurring pipeline.
+   * @param resourceOverTime the predicted {@code Resource} allocation for the
+   *                         pipeline.
+   * @throws SkylineStoreException if input parameters are invalid.
+   */
+  void addEstimation(String pipelineId,
+      RLESparseResourceAllocation resourceOverTime)
+      throws SkylineStoreException;
+
+  /**
+   * Return the predicted {@code Resource} allocation for the pipeline.
+   * <p> If the pipelineId does not exist, it will return <em>null</em>.
+   *
+   * @param pipelineId the unique id of the pipeline.
+   * @return the predicted {@code Resource} allocation for the pipeline.
+   * @throws SkylineStoreException if pipelineId is <em>null</em>.
+   */
+  RLESparseResourceAllocation getEstimation(String pipelineId)
+      throws SkylineStoreException;
+}

+ 30 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/api/SkylineStore.java

@@ -0,0 +1,30 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.skylinestore.api;
+
+/**
+ * SkylineStore is composable interface for storing the history
+ * {@code ResourceSkyline}s of past job runs and the predicted
+ * {@code RLESparseResourceAllocation} for future execution.
+ */
+public interface SkylineStore
+    extends HistorySkylineStore, PredictionSkylineStore {
+}

+ 23 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/api/package-info.java

@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * APIs for the {@code SkylineStore}.
+ */
+
+package org.apache.hadoop.resourceestimator.skylinestore.api;

+ 33 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/exceptions/DuplicateRecurrenceIdException.java

@@ -0,0 +1,33 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.skylinestore.exceptions;
+
+/**
+ * Exception thrown the {@code RecurrenceId} already exists in the
+ * {@code SkylineStore}.
+ */
+public class DuplicateRecurrenceIdException extends SkylineStoreException {
+  private static final long serialVersionUID = -684069387367879218L;
+
+  public DuplicateRecurrenceIdException(final String message) {
+    super(message);
+  }
+}

+ 33 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/exceptions/EmptyResourceSkylineException.java

@@ -0,0 +1,33 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.skylinestore.exceptions;
+
+/**
+ * Exception thrown if the @link{ResourceSkyline}s to be added to the
+ * {@code SkylineStore} is empty.
+ */
+public class EmptyResourceSkylineException extends SkylineStoreException {
+  private static final long serialVersionUID = -684069387367879218L;
+
+  public EmptyResourceSkylineException(final String message) {
+    super(message);
+  }
+}

+ 32 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/exceptions/NullPipelineIdException.java

@@ -0,0 +1,32 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.skylinestore.exceptions;
+
+/**
+ * Exception thrown when pipelineId to be added is <em>null</em>.
+ */
+public class NullPipelineIdException extends SkylineStoreException {
+  private static final long serialVersionUID = -684069387367879218L;
+
+  public NullPipelineIdException(final String message) {
+    super(message);
+  }
+}

+ 33 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/exceptions/NullRLESparseResourceAllocationException.java

@@ -0,0 +1,33 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.skylinestore.exceptions;
+
+/**
+ * Exception thrown if the {@code ResourceSkyline} to be added is <em>null</em>.
+ */
+public class NullRLESparseResourceAllocationException
+    extends SkylineStoreException {
+  private static final long serialVersionUID = -684069387367879218L;
+
+  public NullRLESparseResourceAllocationException(final String message) {
+    super(message);
+  }
+}

+ 32 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/exceptions/NullRecurrenceIdException.java

@@ -0,0 +1,32 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.skylinestore.exceptions;
+
+/**
+ * Exception thrown the {@code RecurrenceId} to be added is <em>null</em>.
+ */
+public class NullRecurrenceIdException extends SkylineStoreException {
+  private static final long serialVersionUID = -684069387367879218L;
+
+  public NullRecurrenceIdException(final String message) {
+    super(message);
+  }
+}

+ 32 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/exceptions/NullResourceSkylineException.java

@@ -0,0 +1,32 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.skylinestore.exceptions;
+
+/**
+ * Exception thrown if the {@code ResourceSkyline} to be added is <em>null</em>.
+ */
+public class NullResourceSkylineException extends SkylineStoreException {
+  private static final long serialVersionUID = -684069387367879218L;
+
+  public NullResourceSkylineException(final String message) {
+    super(message);
+  }
+}

+ 33 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/exceptions/RecurrenceIdNotFoundException.java

@@ -0,0 +1,33 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.skylinestore.exceptions;
+
+/**
+ * Exception thrown if {@code RecurrenceId} is not found in the
+ * {@code SkylineStore}.
+ */
+public class RecurrenceIdNotFoundException extends SkylineStoreException {
+  private static final long serialVersionUID = -684069387367879218L;
+
+  public RecurrenceIdNotFoundException(final String message) {
+    super(message);
+  }
+}

+ 33 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/exceptions/SkylineStoreException.java

@@ -0,0 +1,33 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.skylinestore.exceptions;
+
+/**
+ * Exception thrown the @link{SkylineStore} or the {@code Estimator} tries to
+ * addHistory or query pipeline job's resource skylines.
+ */
+public abstract class SkylineStoreException extends Exception {
+  private static final long serialVersionUID = -684069387367879218L;
+
+  public SkylineStoreException(final String message) {
+    super(message);
+  }
+}

+ 24 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/exceptions/package-info.java

@@ -0,0 +1,24 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+/**
+ * SkylineStore exception module.
+ */
+package org.apache.hadoop.resourceestimator.skylinestore.exceptions;

+ 256 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/impl/InMemoryStore.java

@@ -0,0 +1,256 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.skylinestore.impl;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+import org.apache.hadoop.resourceestimator.common.api.RecurrenceId;
+import org.apache.hadoop.resourceestimator.common.api.ResourceSkyline;
+import org.apache.hadoop.resourceestimator.skylinestore.api.SkylineStore;
+import org.apache.hadoop.resourceestimator.skylinestore.exceptions.DuplicateRecurrenceIdException;
+import org.apache.hadoop.resourceestimator.skylinestore.exceptions.EmptyResourceSkylineException;
+import org.apache.hadoop.resourceestimator.skylinestore.exceptions.RecurrenceIdNotFoundException;
+import org.apache.hadoop.resourceestimator.skylinestore.exceptions.SkylineStoreException;
+import org.apache.hadoop.resourceestimator.skylinestore.validator.SkylineStoreValidator;
+import org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * An in-memory implementation of {@link SkylineStore}.
+ */
+public class InMemoryStore implements SkylineStore {
+  private static final Logger LOGGER =
+      LoggerFactory.getLogger(InMemoryStore.class);
+  private final ReentrantReadWriteLock readWriteLock =
+      new ReentrantReadWriteLock();
+  private final Lock readLock = readWriteLock.readLock();
+  private final Lock writeLock = readWriteLock.writeLock();
+  private final SkylineStoreValidator inputValidator =
+      new SkylineStoreValidator();
+  /**
+   * A pipeline job's history {@link ResourceSkyline}s. TODO: we may flatten it
+   * out for quick access.
+   */
+  private final Map<RecurrenceId, List<ResourceSkyline>> skylineStore =
+      new HashMap<>(); // pipelineId, resource skyline
+  // Recurring pipeline's predicted {@link ResourceSkyline}s.
+  private final Map<String, RLESparseResourceAllocation> estimationStore =
+      new HashMap<>(); // pipelineId, ResourceSkyline
+
+  private List<ResourceSkyline> eliminateNull(
+      final List<ResourceSkyline> resourceSkylines) {
+    final List<ResourceSkyline> result = new ArrayList<>();
+    for (final ResourceSkyline resourceSkyline : resourceSkylines) {
+      if (resourceSkyline != null) {
+        result.add(resourceSkyline);
+      }
+    }
+    return result;
+  }
+
+  @Override public final void addHistory(final RecurrenceId recurrenceId,
+      final List<ResourceSkyline> resourceSkylines)
+      throws SkylineStoreException {
+    inputValidator.validate(recurrenceId, resourceSkylines);
+    writeLock.lock();
+    try {
+      // remove the null elements in the resourceSkylines
+      final List<ResourceSkyline> filteredInput =
+          eliminateNull(resourceSkylines);
+      if (filteredInput.size() > 0) {
+        if (skylineStore.containsKey(recurrenceId)) {
+          // if filteredInput has duplicate jobIds with existing skylines in the
+          // store,
+          // throw out an exception
+          final List<ResourceSkyline> jobHistory =
+              skylineStore.get(recurrenceId);
+          final List<String> oldJobIds = new ArrayList<>();
+          for (final ResourceSkyline resourceSkyline : jobHistory) {
+            oldJobIds.add(resourceSkyline.getJobId());
+          }
+          if (!oldJobIds.isEmpty()) {
+            for (ResourceSkyline elem : filteredInput) {
+              if (oldJobIds.contains(elem.getJobId())) {
+                StringBuilder errMsg = new StringBuilder();
+                errMsg.append(
+                    "Trying to addHistory duplicate resource skylines for "
+                        + recurrenceId
+                        + ". Use updateHistory function instead.");
+                LOGGER.error(errMsg.toString());
+                throw new DuplicateRecurrenceIdException(errMsg.toString());
+              }
+            }
+          }
+          skylineStore.get(recurrenceId).addAll(filteredInput);
+          LOGGER.info("Successfully addHistory new resource skylines for {}.",
+              recurrenceId);
+        } else {
+          skylineStore.put(recurrenceId, filteredInput);
+          LOGGER.info("Successfully addHistory new resource skylines for {}.",
+              recurrenceId);
+        }
+      }
+    } finally {
+      writeLock.unlock();
+    }
+  }
+
+  @Override public void addEstimation(String pipelineId,
+      RLESparseResourceAllocation resourceSkyline)
+      throws SkylineStoreException {
+    inputValidator.validate(pipelineId, resourceSkyline);
+    writeLock.lock();
+    try {
+      estimationStore.put(pipelineId, resourceSkyline);
+      LOGGER.info("Successfully add estimated resource allocation for {}.",
+          pipelineId);
+    } finally {
+      writeLock.unlock();
+    }
+  }
+
+  @Override public final void deleteHistory(final RecurrenceId recurrenceId)
+      throws SkylineStoreException {
+    inputValidator.validate(recurrenceId);
+    writeLock.lock();
+    try {
+      if (skylineStore.containsKey(recurrenceId)) {
+        skylineStore.remove(recurrenceId);
+        LOGGER.warn("Delete resource skylines for {}.", recurrenceId);
+      } else {
+        StringBuilder errMsg = new StringBuilder();
+        errMsg.append(
+            "Trying to deleteHistory non-existing recurring pipeline  "
+                + recurrenceId + "\'s resource skylines");
+        LOGGER.error(errMsg.toString());
+        throw new RecurrenceIdNotFoundException(errMsg.toString());
+      }
+    } finally {
+      writeLock.unlock();
+    }
+  }
+
+  @Override public final void updateHistory(final RecurrenceId recurrenceId,
+      final List<ResourceSkyline> resourceSkylines)
+      throws SkylineStoreException {
+    inputValidator.validate(recurrenceId, resourceSkylines);
+    writeLock.lock();
+    try {
+      if (skylineStore.containsKey(recurrenceId)) {
+        // remove the null elements in the resourceSkylines
+        List<ResourceSkyline> filteredInput = eliminateNull(resourceSkylines);
+        if (filteredInput.size() > 0) {
+          skylineStore.put(recurrenceId, filteredInput);
+          LOGGER.info("Successfully updateHistory resource skylines for {}.",
+              recurrenceId);
+        } else {
+          StringBuilder errMsg = new StringBuilder();
+          errMsg.append("Trying to updateHistory " + recurrenceId
+              + " with empty resource skyline");
+          LOGGER.error(errMsg.toString());
+          throw new EmptyResourceSkylineException(errMsg.toString());
+        }
+      } else {
+        StringBuilder errMsg = new StringBuilder();
+        errMsg.append(
+            "Trying to updateHistory non-existing resource skylines for "
+                + recurrenceId);
+        LOGGER.error(errMsg.toString());
+        throw new RecurrenceIdNotFoundException(errMsg.toString());
+      }
+    } finally {
+      writeLock.unlock();
+    }
+  }
+
+  @Override public final Map<RecurrenceId, List<ResourceSkyline>> getHistory(
+      final RecurrenceId recurrenceId) throws SkylineStoreException {
+    inputValidator.validate(recurrenceId);
+    readLock.lock();
+    try {
+      String pipelineId = recurrenceId.getPipelineId();
+      // User tries to getHistory all resource skylines in the skylineStore
+      if (pipelineId.equals("*")) {
+        LOGGER
+            .info("Successfully query resource skylines for {}.", recurrenceId);
+        return Collections.unmodifiableMap(skylineStore);
+      }
+      String runId = recurrenceId.getRunId();
+      Map<RecurrenceId, List<ResourceSkyline>> result =
+          new HashMap<RecurrenceId, List<ResourceSkyline>>();
+      // User tries to getHistory pipelineId's all resource skylines in the
+      // skylineStore
+      if (runId.equals("*")) {
+        // TODO: this for loop is expensive, so we may change the type of
+        // skylineStore to
+        // speed up this loop.
+        for (Map.Entry<RecurrenceId, List<ResourceSkyline>> entry : skylineStore
+            .entrySet()) {
+          RecurrenceId index = entry.getKey();
+          if (index.getPipelineId().equals(pipelineId)) {
+            result.put(index, entry.getValue());
+          }
+        }
+        if (result.size() > 0) {
+          LOGGER.info("Successfully query resource skylines for {}.",
+              recurrenceId);
+          return Collections.unmodifiableMap(result);
+        } else {
+          LOGGER.warn(
+              "Trying to getHistory non-existing resource skylines for {}.",
+              recurrenceId);
+          return null;
+        }
+      }
+      // User tries to getHistory {pipelineId, runId}'s resource skylines
+      if (skylineStore.containsKey(recurrenceId)) {
+        result.put(recurrenceId, skylineStore.get(recurrenceId));
+      } else {
+        LOGGER
+            .warn("Trying to getHistory non-existing resource skylines for {}.",
+                recurrenceId);
+        return null;
+      }
+      LOGGER.info("Successfully query resource skylines for {}.", recurrenceId);
+      return Collections.unmodifiableMap(result);
+    } finally {
+      readLock.unlock();
+    }
+  }
+
+  @Override public final RLESparseResourceAllocation getEstimation(
+      String pipelineId) throws SkylineStoreException {
+    inputValidator.validate(pipelineId);
+    readLock.lock();
+    try {
+      return estimationStore.get(pipelineId);
+    } finally {
+      readLock.unlock();
+    }
+  }
+}

+ 23 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/impl/package-info.java

@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Implementation for {@code SkylineStore}.
+ */
+
+package org.apache.hadoop.resourceestimator.skylinestore.impl;

+ 118 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/validator/SkylineStoreValidator.java

@@ -0,0 +1,118 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.skylinestore.validator;
+
+import java.util.List;
+
+import org.apache.hadoop.resourceestimator.common.api.RecurrenceId;
+import org.apache.hadoop.resourceestimator.common.api.ResourceSkyline;
+import org.apache.hadoop.resourceestimator.skylinestore.api.SkylineStore;
+import org.apache.hadoop.resourceestimator.skylinestore.exceptions.NullPipelineIdException;
+import org.apache.hadoop.resourceestimator.skylinestore.exceptions.NullRLESparseResourceAllocationException;
+import org.apache.hadoop.resourceestimator.skylinestore.exceptions.NullRecurrenceIdException;
+import org.apache.hadoop.resourceestimator.skylinestore.exceptions.NullResourceSkylineException;
+import org.apache.hadoop.resourceestimator.skylinestore.exceptions.SkylineStoreException;
+import org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * SkylineStoreValidator validates input parameters for {@link SkylineStore}.
+ */
+public class SkylineStoreValidator {
+  private static final Logger LOGGER =
+      LoggerFactory.getLogger(SkylineStoreValidator.class);
+
+  /**
+   * Check if recurrenceId is <em>null</em>.
+   *
+   * @param recurrenceId the id of the recurring pipeline job.
+   * @throws SkylineStoreException if input parameters are invalid.
+   */
+  public final void validate(final RecurrenceId recurrenceId)
+      throws SkylineStoreException {
+    if (recurrenceId == null) {
+      StringBuilder sb = new StringBuilder();
+      sb.append("Recurrence id is null, please try again by specifying"
+          + " a valid Recurrence id.");
+      LOGGER.error(sb.toString());
+      throw new NullRecurrenceIdException(sb.toString());
+    }
+  }
+
+  /**
+   * Check if pipelineId is <em>null</em>.
+   *
+   * @param pipelineId the id of the recurring pipeline job.
+   * @throws SkylineStoreException if input parameters are invalid.
+   */
+  public final void validate(final String pipelineId)
+      throws SkylineStoreException {
+    if (pipelineId == null) {
+      StringBuilder sb = new StringBuilder();
+      sb.append("pipelineId is null, please try again by specifying"
+          + " a valid pipelineId.");
+      LOGGER.error(sb.toString());
+      throw new NullPipelineIdException(sb.toString());
+    }
+  }
+
+  /**
+   * Check if recurrenceId is <em>null</em> or resourceSkylines is
+   * <em>null</em>.
+   *
+   * @param recurrenceId     the id of the recurring pipeline job.
+   * @param resourceSkylines the list of {@link ResourceSkyline}s to be added.
+   * @throws SkylineStoreException if input parameters are invalid.
+   */
+  public final void validate(final RecurrenceId recurrenceId,
+      final List<ResourceSkyline> resourceSkylines)
+      throws SkylineStoreException {
+    validate(recurrenceId);
+    if (resourceSkylines == null) {
+      StringBuilder sb = new StringBuilder();
+      sb.append("ResourceSkylines for " + recurrenceId
+          + " is null, please try again by "
+          + "specifying valid ResourceSkylines.");
+      LOGGER.error(sb.toString());
+      throw new NullResourceSkylineException(sb.toString());
+    }
+  }
+
+  /**
+   * Check if pipelineId is <em>null</em> or resourceOverTime is <em>null</em>.
+   *
+   * @param pipelineId       the id of the recurring pipeline.
+   * @param resourceOverTime predicted {@code Resource} allocation to be added.
+   * @throws SkylineStoreException if input parameters are invalid.
+   */
+  public final void validate(final String pipelineId,
+      final RLESparseResourceAllocation resourceOverTime)
+      throws SkylineStoreException {
+    validate(pipelineId);
+    if (resourceOverTime == null) {
+      StringBuilder sb = new StringBuilder();
+      sb.append("Resource allocation for " + pipelineId + " is null.");
+      LOGGER.error(sb.toString());
+      throw new NullRLESparseResourceAllocationException(sb.toString());
+    }
+  }
+}

+ 23 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/skylinestore/validator/package-info.java

@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Validator for {@code SkylineStore}.
+ */
+
+package org.apache.hadoop.resourceestimator.skylinestore.validator;

+ 76 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/api/Solver.java

@@ -0,0 +1,76 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.solver.api;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.resourceestimator.common.api.RecurrenceId;
+import org.apache.hadoop.resourceestimator.common.api.ResourceSkyline;
+import org.apache.hadoop.resourceestimator.skylinestore.api.PredictionSkylineStore;
+import org.apache.hadoop.resourceestimator.skylinestore.exceptions.SkylineStoreException;
+import org.apache.hadoop.resourceestimator.solver.exceptions.SolverException;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation;
+
+/**
+ * Solver takes recurring pipeline's {@link ResourceSkyline} history as input,
+ * predicts its {@link Resource} requirement at each time t for the next run,
+ * and translate them into {@link ResourceSkyline} which will be used to make
+ * recurring resource reservations.
+ */
+public interface Solver {
+  /**
+   * Initializing the Solver, including loading solver parameters from
+   * configuration file.
+   *
+   * @param config       {@link Configuration} for the Solver.
+   * @param skylineStore the {@link PredictionSkylineStore} which stores
+   *                     predicted {@code Resource} allocations.
+   */
+  void init(Configuration config, PredictionSkylineStore skylineStore);
+
+  /**
+   * The Solver reads recurring pipeline's {@link ResourceSkyline} history, and
+   * precits its {@link ResourceSkyline} requirements for the next run.
+   *
+   * @param jobHistory the {@link ResourceSkyline}s of the recurring pipeline in
+   *     previous runs. The {@link RecurrenceId} identifies one run of the
+   *     recurring pipeline, and the list of {@link ResourceSkyline}s
+   *     records the {@link ResourceSkyline} of each job within the pipeline.
+   * @return the amount of {@link Resource} requested by the pipeline for the
+   * next run (discretized by timeInterval).
+   * @throws SolverException       if: (1) input is invalid; (2) the number of
+   *     instances in the jobHistory is smaller than the minimum
+   *     requirement; (3) solver runtime has unexpected behaviors;
+   * @throws SkylineStoreException if it fails to add predicted {@code Resource}
+   *     allocation to the {@link PredictionSkylineStore}.
+   */
+  RLESparseResourceAllocation solve(
+      Map<RecurrenceId, List<ResourceSkyline>> jobHistory)
+      throws SolverException, SkylineStoreException;
+
+  /**
+   * Release the resource used by the Solver.
+   */
+  void close();
+}

+ 23 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/api/package-info.java

@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * API for {@code Solver}.
+ */
+
+package org.apache.hadoop.resourceestimator.solver.api;

+ 34 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/exceptions/InvalidInputException.java

@@ -0,0 +1,34 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.solver.exceptions;
+
+/**
+ * Exception thrown the {@code SkylineStore} or the {@code Estimator} tries to
+ * addHistory or query pipeline job's resource skylines.
+ */
+public class InvalidInputException extends SolverException {
+
+  private static final long serialVersionUID = -684069387367879218L;
+
+  public InvalidInputException(final String entity, final String reason) {
+    super(entity + " is " + reason + ", please try again with valid " + entity);
+  }
+}

+ 34 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/exceptions/InvalidSolverException.java

@@ -0,0 +1,34 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.solver.exceptions;
+
+/**
+ * Exception thrown the @link{SkylineStore} or the {@code Estimator} tries to
+ * addHistory or query pipeline job's resource skylines.
+ */
+public class InvalidSolverException extends SolverException {
+
+  private static final long serialVersionUID = -684069387367879218L;
+
+  public InvalidSolverException(final String message) {
+    super(message);
+  }
+}

+ 34 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/exceptions/SolverException.java

@@ -0,0 +1,34 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.solver.exceptions;
+
+/**
+ * Exception thrown the @link{SkylineStore} or the {@code Estimator} tries to
+ * addHistory or query pipeline job's resource skylines.
+ */
+public abstract class SolverException extends Exception {
+
+  private static final long serialVersionUID = -684069387367879218L;
+
+  public SolverException(final String message) {
+    super(message);
+  }
+}

+ 24 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/exceptions/package-info.java

@@ -0,0 +1,24 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+/**
+ * Exception module.
+ */
+package org.apache.hadoop.resourceestimator.solver.exceptions;

+ 94 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/impl/BaseSolver.java

@@ -0,0 +1,94 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.solver.impl;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.resourceestimator.common.config.ResourceEstimatorConfiguration;
+import org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest;
+import org.apache.hadoop.yarn.api.records.ReservationDefinition;
+import org.apache.hadoop.yarn.api.records.ReservationId;
+import org.apache.hadoop.yarn.api.records.ReservationRequest;
+import org.apache.hadoop.yarn.api.records.ReservationRequestInterpreter;
+import org.apache.hadoop.yarn.api.records.ReservationRequests;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation;
+
+/**
+ * Common functions shared by {@code Solver} (translate predicted resource
+ * allocation into Hadoop's {@link ReservationSubmissionRequest}.
+ */
+public abstract class BaseSolver {
+  /**
+   * Used to generate {@link ReservationId}.
+   */
+  private static final Random RAND = new Random();
+
+  /**
+   * Translate the estimated {@link Resource} requirements of the pipeline to
+   * Hadoop's {@link ReservationSubmissionRequest}.
+   *
+   * @param containerSpec     the {@link Resource} to be allocated to each
+   *                          container;
+   * @param containerRequests the predicted {@link Resource} to be allocated to
+   *                          the job in each discrete time intervals;
+   * @param config            configuration file for BaseSolver.
+   * @return {@link ReservationSubmissionRequest} to be submitted to Hadoop to
+   * make recurring resource reservation for the pipeline.
+   */
+  public final ReservationSubmissionRequest toRecurringRDL(
+      final Resource containerSpec,
+      final RLESparseResourceAllocation containerRequests,
+      final Configuration config) {
+    final int timeInterval =
+        config.getInt(ResourceEstimatorConfiguration.TIME_INTERVAL_KEY, 5);
+    long pipelineSubmissionTime = containerRequests.getEarliestStartTime();
+    long pipelineFinishTime = containerRequests.getLatestNonNullTime();
+    final long containerMemAlloc = containerSpec.getMemorySize();
+    final long jobLen =
+        (pipelineFinishTime - pipelineSubmissionTime) / timeInterval;
+    List<ReservationRequest> reservationRequestList = new ArrayList<>();
+    for (int i = 0; i < jobLen; i++) {
+      // container spec, # of containers, concurrency, duration
+      ReservationRequest reservationRequest = ReservationRequest
+          .newInstance(containerSpec, (int) (
+              containerRequests.getCapacityAtTime(i * timeInterval)
+                  .getMemorySize() / containerMemAlloc), 1, timeInterval);
+      reservationRequestList.add(reservationRequest);
+    }
+    ReservationRequests reservationRequests = ReservationRequests
+        .newInstance(reservationRequestList,
+            ReservationRequestInterpreter.R_ALL);
+    ReservationDefinition reservationDefinition = ReservationDefinition
+        .newInstance(pipelineSubmissionTime, pipelineFinishTime,
+            reservationRequests, "LpSolver#toRecurringRDL");
+    ReservationId reservationId =
+        ReservationId.newInstance(RAND.nextLong(), RAND.nextLong());
+    ReservationSubmissionRequest reservationSubmissionRequest =
+        ReservationSubmissionRequest
+            .newInstance(reservationDefinition, "resourceestimator",
+                reservationId);
+    return reservationSubmissionRequest;
+  }
+}

+ 340 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/impl/LpSolver.java

@@ -0,0 +1,340 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.solver.impl;
+
+import java.math.BigDecimal;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.resourceestimator.common.api.RecurrenceId;
+import org.apache.hadoop.resourceestimator.common.api.ResourceSkyline;
+import org.apache.hadoop.resourceestimator.common.config.ResourceEstimatorConfiguration;
+import org.apache.hadoop.resourceestimator.skylinestore.api.PredictionSkylineStore;
+import org.apache.hadoop.resourceestimator.skylinestore.exceptions.SkylineStoreException;
+import org.apache.hadoop.resourceestimator.solver.api.Solver;
+import org.apache.hadoop.resourceestimator.solver.exceptions.SolverException;
+import org.apache.hadoop.resourceestimator.solver.preprocess.SolverPreprocessor;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation;
+import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationInterval;
+import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
+import org.ojalgo.optimisation.Expression;
+import org.ojalgo.optimisation.ExpressionsBasedModel;
+import org.ojalgo.optimisation.Optimisation.Result;
+import org.ojalgo.optimisation.Variable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A LP(Linear Programming) solution to predict recurring pipeline's
+ * {@link Resource} requirements, and generate Hadoop {@code RDL} requests which
+ * will be used to make recurring resource reservation.
+ */
+public class LpSolver extends BaseSolver implements Solver {
+  private static final Logger LOGGER = LoggerFactory.getLogger(LpSolver.class);
+  private final SolverPreprocessor preprocessor = new SolverPreprocessor();
+  /**
+   * Controls the balance between over-allocation and under-allocation.
+   */
+  private double alpha;
+  /**
+   * Controls the generalization of the solver.
+   */
+  private double beta;
+  /**
+   * The minimum number of job runs required to run the solver.
+   */
+  private int minJobRuns;
+  /**
+   * The time interval which is used to discretize job execution.
+   */
+  private int timeInterval;
+  /**
+   * The PredictionSkylineStore to store the predicted ResourceSkyline for new
+   * run.
+   */
+  private PredictionSkylineStore predictionSkylineStore;
+
+  @Override public final void init(final Configuration config,
+      PredictionSkylineStore skylineStore) {
+    this.alpha =
+        config.getDouble(ResourceEstimatorConfiguration.SOLVER_ALPHA_KEY, 0.1);
+    this.beta =
+        config.getDouble(ResourceEstimatorConfiguration.SOLVER_BETA_KEY, 0.1);
+    this.minJobRuns =
+        config.getInt(ResourceEstimatorConfiguration.SOLVER_MIN_JOB_RUN_KEY, 1);
+    this.timeInterval =
+        config.getInt(ResourceEstimatorConfiguration.TIME_INTERVAL_KEY, 5);
+    this.predictionSkylineStore = skylineStore;
+  }
+
+  /**
+   * Generate over-allocation constraints.
+   *
+   * @param lpModel            the LP model.
+   * @param cJobITimeK         actual container allocation for job i in time
+   *                           interval k.
+   * @param oa                 container over-allocation.
+   * @param x                  predicted container allocation.
+   * @param indexJobITimeK     index for job i at time interval k.
+   * @param timeK              index for time interval k.
+   */
+  private void generateOverAllocationConstraints(
+      final ExpressionsBasedModel lpModel, final double cJobITimeK,
+      final Variable[] oa, final Variable[] x, final int indexJobITimeK,
+      final int timeK) {
+    // oa_job_i_timeK >= x_timeK - cJobITimeK
+    Expression overAllocExpression =
+        lpModel.addExpression("over_alloc_" + indexJobITimeK);
+    overAllocExpression.set(oa[indexJobITimeK], 1);
+    overAllocExpression.set(x[timeK], -1);
+    overAllocExpression.lower(-cJobITimeK); // >=
+  }
+
+  /**
+   * Generate under-allocation constraints.
+   *
+   * @param lpModel            the LP model.
+   * @param cJobITimeK     actual container allocation for job i in time
+   *                           interval k.
+   * @param uaPredict          absolute container under-allocation.
+   * @param ua                 recursive container under-allocation.
+   * @param x                  predicted container allocation.
+   * @param indexJobITimeK index for job i at time interval k.
+   * @param timeK             index for time interval k.
+   */
+  private void generateUnderAllocationConstraints(
+      final ExpressionsBasedModel lpModel, final double cJobITimeK,
+      final Variable[] uaPredict, final Variable[] ua, final Variable[] x,
+      final int indexJobITimeK, final int timeK) {
+    // uaPredict_job_i_timeK + x_timeK >= cJobITimeK
+    Expression underAllocPredictExpression =
+        lpModel.addExpression("under_alloc_predict_" + indexJobITimeK);
+    underAllocPredictExpression.set(uaPredict[indexJobITimeK], 1);
+    underAllocPredictExpression.set(x[timeK], 1);
+    underAllocPredictExpression.lower(cJobITimeK); // >=
+    if (timeK >= 1) {
+      /** Recursively calculate container under-allocation. */
+      // ua_job_i_timeK >= ua_job_i_time_(k-1) + cJobITimeK - x_timeK
+      Expression underAllocExpression =
+          lpModel.addExpression("under_alloc_" + indexJobITimeK);
+      underAllocExpression.set(ua[indexJobITimeK], 1);
+      underAllocExpression.set(ua[indexJobITimeK - 1], -1);
+      underAllocExpression.set(x[timeK], 1);
+      underAllocExpression.lower(cJobITimeK); // >=
+    } else {
+      /** Initial value for container under-allocation. */
+      // ua_job_i_time_0 >= cJobI_time_0 - x_time_0
+      Expression underAllocExpression =
+          lpModel.addExpression("under_alloc_" + indexJobITimeK);
+      underAllocExpression.set(ua[indexJobITimeK], 1);
+      underAllocExpression.set(x[timeK], 1);
+      underAllocExpression.lower(cJobITimeK); // >=
+    }
+  }
+
+  /**
+   * Generate solver objective.
+   *
+   * @param objective LP solver objective.
+   * @param numJobs   number of history runs of the recurring pipeline.
+   * @param jobLen    (maximum) job lenght of the recurring pipeline.
+   * @param oa        container over-allocation.
+   * @param ua        recursive container under-allocation.
+   * @param eps       regularization parameter.
+   */
+  private void generateObjective(final Expression objective, final int numJobs,
+      final int jobLen, final Variable[] oa, final Variable[] ua,
+      final Variable eps) {
+    int indexJobITimeK;
+    // sum Over_Allocation
+    for (int indexJobI = 0; indexJobI < numJobs; indexJobI++) {
+      for (int timeK = 0; timeK < jobLen; timeK++) {
+        indexJobITimeK = indexJobI * jobLen + timeK;
+        objective.set(oa[indexJobITimeK], alpha / numJobs);
+      }
+    }
+    // sum Under_Allocation
+    int indexJobITimeN;
+    for (int indexJobI = 0; indexJobI < numJobs; indexJobI++) {
+      indexJobITimeN = indexJobI * jobLen + jobLen - 1;
+      objective.set(ua[indexJobITimeN], (1 - alpha) / numJobs);
+    }
+    objective.set(eps, beta);
+    objective.weight(BigDecimal.valueOf(1));
+  }
+
+  /**
+   * Get the job length of recurring pipeline.
+   *
+   * @param resourceSkylines the history ResourceSkylines allocated to the
+   *                         recurring pipeline.
+   * @param numJobs          number of history runs of the recurring pipeline.
+   * @return length of (discretized time intervals of) the recurring pipeline.
+   */
+  private int getJobLen(final List<ResourceSkyline> resourceSkylines,
+      final int numJobs) {
+    int curLen = 0;
+    int jobLen = 0;
+    for (int indexJobI = 0; indexJobI < numJobs; indexJobI++) {
+      curLen = (int) (resourceSkylines.get(indexJobI).getSkylineList()
+          .getLatestNonNullTime() - resourceSkylines.get(indexJobI)
+          .getSkylineList().getEarliestStartTime() + timeInterval - 1)
+          / timeInterval; // for round up
+      if (jobLen < curLen) {
+        jobLen = curLen;
+      }
+    }
+    return jobLen;
+  }
+
+  @Override public final RLESparseResourceAllocation solve(
+      final Map<RecurrenceId, List<ResourceSkyline>> jobHistory)
+      throws SolverException, SkylineStoreException {
+    // TODO: addHistory timeout support for this function, and ideally we should
+    // return the confidence
+    // level associated with the predicted resource.
+    preprocessor.validate(jobHistory, timeInterval);
+    final List<ResourceSkyline> resourceSkylines =
+        preprocessor.aggregateSkylines(jobHistory, minJobRuns);
+    final int numJobs = resourceSkylines.size();
+    final int jobLen = getJobLen(resourceSkylines, numJobs);
+
+    /** Create variables. */
+    final ExpressionsBasedModel lpModel = new ExpressionsBasedModel();
+
+    Variable[] oa = new Variable[jobLen * numJobs];
+    Variable[] ua = new Variable[jobLen * numJobs];
+    Variable[] uaPredict = new Variable[jobLen * numJobs];
+    Variable[] x = new Variable[jobLen];
+    for (int i = 0; i < jobLen * numJobs; i++) {
+      oa[i] = new Variable("oa" + i).lower(BigDecimal.valueOf(0));
+      ua[i] = new Variable("ua" + i).lower(BigDecimal.valueOf(0));
+      uaPredict[i] = new Variable("uaPredict" + i).lower(BigDecimal.valueOf(0));
+    }
+    for (int i = 0; i < jobLen; i++) {
+      x[i] = new Variable("x").lower(BigDecimal.valueOf(0));
+    }
+    lpModel.addVariables(x);
+    lpModel.addVariables(oa);
+    lpModel.addVariables(ua);
+    lpModel.addVariables(uaPredict);
+    Variable eps = new Variable("epsilon").lower(BigDecimal.valueOf(0));
+    lpModel.addVariable(eps);
+
+    /** Set constraints. */
+    int indexJobITimeK = 0;
+    double cJobI = 0;
+    double cJobITimeK = 0;
+    ResourceSkyline resourceSkyline;
+    int[] containerNums;
+    // 1. sum(job_i){sum(timeK){1/cJobI * uaPredict_job_i_timeK}} <= numJobs
+    // * eps
+    Expression regularizationConstraint =
+        lpModel.addExpression("regularization");
+    regularizationConstraint.set(eps, -numJobs);
+    regularizationConstraint.upper(BigDecimal.valueOf(0)); // <= 0
+    for (int indexJobI = 0;
+         indexJobI < resourceSkylines.size(); indexJobI++) {
+      resourceSkyline = resourceSkylines.get(indexJobI);
+      // the # of containers consumed by job i in discretized time intervals
+      containerNums = preprocessor
+          .getDiscreteSkyline(resourceSkyline.getSkylineList(), timeInterval,
+              resourceSkyline.getContainerSpec().getMemorySize(), jobLen);
+      // the aggregated # of containers consumed by job i during its lifespan
+      cJobI = 0;
+      for (int i = 0; i < containerNums.length; i++) {
+        cJobI = cJobI + containerNums[i];
+      }
+      for (int timeK = 0; timeK < jobLen; timeK++) {
+        indexJobITimeK = indexJobI * jobLen + timeK;
+        // the # of containers consumed by job i in the k-th time interval
+        cJobITimeK = containerNums[timeK];
+        regularizationConstraint
+            .set(uaPredict[indexJobITimeK], 1 / cJobI);
+        generateOverAllocationConstraints(lpModel, cJobITimeK, oa, x,
+            indexJobITimeK, timeK);
+        generateUnderAllocationConstraints(lpModel, cJobITimeK, uaPredict,
+            ua, x, indexJobITimeK, timeK);
+      }
+    }
+
+    /** Set objective. */
+    Expression objective = lpModel.addExpression("objective");
+    generateObjective(objective, numJobs, jobLen, oa, ua, eps);
+
+    /** Solve the model. */
+    final Result lpResult = lpModel.minimise();
+    final TreeMap<Long, Resource> treeMap = new TreeMap<>();
+    RLESparseResourceAllocation result =
+        new RLESparseResourceAllocation(treeMap,
+            new DefaultResourceCalculator());
+    ReservationInterval riAdd;
+    Resource containerSpec = resourceSkylines.get(0).getContainerSpec();
+    String pipelineId =
+        ((RecurrenceId) jobHistory.keySet().toArray()[0]).getPipelineId();
+    Resource resource;
+    for (int indexTimeK = 0; indexTimeK < jobLen; indexTimeK++) {
+      riAdd = new ReservationInterval(indexTimeK * timeInterval,
+          (indexTimeK + 1) * timeInterval);
+      resource = Resource.newInstance(
+          containerSpec.getMemorySize() * (int) lpResult
+              .doubleValue(indexTimeK),
+          containerSpec.getVirtualCores() * (int) lpResult
+              .doubleValue(indexTimeK));
+      result.addInterval(riAdd, resource);
+      LOGGER.debug("time interval: {}, container: {}.", indexTimeK,
+          lpResult.doubleValue(indexTimeK));
+    }
+
+    predictionSkylineStore.addEstimation(pipelineId, result);
+
+    /**
+     * TODO: 1. We can calculate the estimated error (over-allocation,
+     * under-allocation) of our prediction which could be used to generate
+     * confidence level for our prediction; 2. Also, we can modify our model to
+     * take job input data size (and maybe stage info) into consideration; 3. We
+     * can also try to generate such conclusion: our prediction under-allocates
+     * X amount of resources from time 0 to time 100 compared with 95% of
+     * history runs; 4. We can build framework-specific versions of estimator
+     * (such as scope/spark/hive, etc.) and provides more specific suggestions.
+     * For example, we may say: for spark job i, its task size is X GB while the
+     * container memory allocation is Y GB; as a result, its shuffling stage is
+     * 20% slower than ideal case due to the disk spilling operations, etc. 5.
+     * If we have more information of jobs (other than ResourceSkyline), we may
+     * have such conclusion: job i is 20% slower than 90% of history runs, and
+     * it is because part of its tasks are running together with job j's tasks.
+     * In this case, we not only predict the amount of resource needed for job
+     * i, but also how to place the resource requirements to clusters; 6. We may
+     * monitor job progress, and dynamically increase/decrease container
+     * allocations to satisfy job deadline while minimizing the cost; 7. We may
+     * allow users to specify a budget (say $100 per job run), and optimize the
+     * resource allocation under the budget constraints. 8. ...
+     */
+    return result;
+  }
+
+  @Override public final void close() {
+    // TODO: currently place holder
+  }
+}

+ 23 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/impl/package-info.java

@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Implementation for {@code Solver}.
+ */
+
+package org.apache.hadoop.resourceestimator.solver.impl;

+ 219 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/preprocess/SolverPreprocessor.java

@@ -0,0 +1,219 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.solver.preprocess;
+
+import static java.lang.Math.toIntExact;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.hadoop.resourceestimator.common.api.RecurrenceId;
+import org.apache.hadoop.resourceestimator.common.api.ResourceSkyline;
+import org.apache.hadoop.resourceestimator.solver.api.Solver;
+import org.apache.hadoop.resourceestimator.solver.exceptions.InvalidInputException;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation;
+import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationInterval;
+import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Common preprocessing functions for {@link Solver}.
+ */
+public class SolverPreprocessor {
+  private static final Logger LOGGER =
+      LoggerFactory.getLogger(SolverPreprocessor.class);
+
+  /**
+   * Check if Solver's input parameters are valid.
+   *
+   * @param jobHistory   the history {@link ResourceSkyline}s of the recurring
+   *                     pipeline job.
+   * @param timeInterval the time interval which is used to discretize the
+   *                     history {@link ResourceSkyline}s.
+   * @throws InvalidInputException if: (1) jobHistory is <em>null</em>;
+   *     (2) jobHistory is empty; (3) timeout is non-positive;
+   *     (4) timeInterval is non-positive;
+   */
+  public final void validate(
+      final Map<RecurrenceId, List<ResourceSkyline>> jobHistory,
+      final int timeInterval) throws InvalidInputException {
+    if ((jobHistory == null) || (jobHistory.size() == 0)) {
+      LOGGER.error(
+          "Job resource skyline history is invalid, please try again with"
+              + " valid resource skyline history.");
+      throw new InvalidInputException("Job ResourceSkyline history", "invalid");
+    }
+
+    if (timeInterval <= 0) {
+      LOGGER.error(
+          "Solver timeInterval {} is invalid, please specify a positive value.",
+          timeInterval);
+      throw new InvalidInputException("Solver timeInterval", "non-positive");
+    }
+  }
+
+  /**
+   * Return the multi-dimension resource vector consumed by the job at specified
+   * time.
+   *
+   * @param skyList           the list of {@link Resource}s used by the job.
+   * @param index             the discretized time index.
+   * @param containerMemAlloc the multi-dimension resource vector allocated to
+   *                          one container.
+   * @return the multi-dimension resource vector consumed by the job.
+   */
+  public final long getResourceVector(final RLESparseResourceAllocation skyList,
+      final int index, final long containerMemAlloc) {
+    return skyList.getCapacityAtTime(index).getMemorySize() / containerMemAlloc;
+  }
+
+  /**
+   * Discretize job's lifespan into intervals, and return the number of
+   * containers used by the job within each interval.
+   * <p> Note that here we assume all containers allocated to the job have the
+   * same {@link Resource}. This is due to the limit of
+   * {@link RLESparseResourceAllocation}.
+   *
+   * @param skyList           the list of {@link Resource}s used by the job.
+   * @param timeInterval      the time interval used to discretize the job's
+   *                          lifespan.
+   * @param containerMemAlloc the amount of memory allocated to each container.
+   * @param jobLen            the duration of the job.
+   * @return the number of containers allocated to the job within discretized
+   * time intervals.
+   */
+  public final int[] getDiscreteSkyline(
+      final RLESparseResourceAllocation skyList, final int timeInterval,
+      final long containerMemAlloc, final int jobLen) {
+    long jobLifeSpan =
+        skyList.getLatestNonNullTime() - skyList.getEarliestStartTime();
+    int[] result = new int[jobLen];
+    Arrays.fill(result, 0);
+
+    int index = 0;
+    long numContainerAt = 0;
+    for (int i = 0; i < jobLifeSpan; i++) {
+      index = (int) Math.floor((double) i / timeInterval);
+      numContainerAt = getResourceVector(skyList, i, containerMemAlloc);
+      if (result[index] < numContainerAt) {
+        result[index] = (int) numContainerAt;
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Merge different jobs' resource skylines into one within the same pipeline.
+   *
+   * @param resourceSkylines different jobs' resource skylines within the same
+   *                         pipeline.
+   * @return an aggregated resource skyline for the pipeline.
+   */
+  public final ResourceSkyline mergeSkyline(
+      final List<ResourceSkyline> resourceSkylines) {
+    // TODO:
+    // rewrite this function with shift and merge once YARN-5328 is committed
+    /** First, getHistory the pipeline submission time. */
+    long pipelineSubmission = Long.MAX_VALUE;
+    for (int i = 0; i < resourceSkylines.size(); i++) {
+      long jobSubmission = resourceSkylines.get(i).getJobSubmissionTime();
+      if (pipelineSubmission > jobSubmission) {
+        pipelineSubmission = jobSubmission;
+      }
+    }
+    final TreeMap<Long, Resource> resourceOverTime = new TreeMap<>();
+    final RLESparseResourceAllocation skylineListAgg =
+        new RLESparseResourceAllocation(resourceOverTime,
+            new DefaultResourceCalculator());
+    /**
+     * Second, adjust different jobs' ResourceSkyline starting time based on
+     * pipeline submission time, and merge them into one ResourceSkyline.
+     */
+    for (int i = 0; i < resourceSkylines.size(); i++) {
+      long jobSubmission = resourceSkylines.get(i).getJobSubmissionTime();
+      long diff = (jobSubmission - pipelineSubmission) / 1000;
+      RLESparseResourceAllocation tmp =
+          resourceSkylines.get(i).getSkylineList();
+      Object[] timePoints = tmp.getCumulative().keySet().toArray();
+      for (int j = 0; j < timePoints.length - 2; j++) {
+        ReservationInterval riAdd =
+            new ReservationInterval(toIntExact((long) timePoints[j]) + diff,
+                toIntExact((long) timePoints[j + 1] + diff));
+        skylineListAgg.addInterval(riAdd,
+            tmp.getCapacityAtTime(toIntExact((long) timePoints[j])));
+      }
+    }
+    ResourceSkyline skylineAgg =
+        new ResourceSkyline(resourceSkylines.get(0).getJobId(),
+            resourceSkylines.get(0).getJobInputDataSize(),
+            resourceSkylines.get(0).getJobSubmissionTime(),
+            resourceSkylines.get(0).getJobFinishTime(),
+            resourceSkylines.get(0).getContainerSpec(), skylineListAgg);
+
+    return skylineAgg;
+  }
+
+  /**
+   * Aggregate all job's {@link ResourceSkyline}s in the one run of recurring
+   * pipeline, and return the aggregated {@link ResourceSkyline}s in different
+   * runs.
+   *
+   * @param jobHistory the history {@link ResourceSkyline} of the recurring
+   *                   pipeline job.
+   * @param minJobRuns the minimum number of job runs required to run the
+   *                   solver.
+   * @return the aggregated {@link ResourceSkyline}s in different runs.
+   * @throws InvalidInputException if: (1) job submission time parsing fails;
+   *     (2) jobHistory has less job runs than the minimum requirement;
+   */
+  public final List<ResourceSkyline> aggregateSkylines(
+      final Map<RecurrenceId, List<ResourceSkyline>> jobHistory,
+      final int minJobRuns) throws InvalidInputException {
+    List<ResourceSkyline> resourceSkylines = new ArrayList<ResourceSkyline>();
+    for (Map.Entry<RecurrenceId, List<ResourceSkyline>> entry : jobHistory
+        .entrySet()) {
+      // TODO: identify different jobs within the same pipeline
+      // right now, we do prediction at the granularity of pipeline, i.e., we
+      // will merge the
+      // resource skylines of jobs within the same pipeline into one aggregated
+      // resource skyline
+      ResourceSkyline skylineAgg = null;
+      skylineAgg = mergeSkyline(entry.getValue());
+      resourceSkylines.add(skylineAgg);
+    }
+    int numJobs = resourceSkylines.size();
+    if (numJobs < minJobRuns) {
+      LOGGER.error(
+          "Solver requires job resource skyline history for at least {} runs,"
+              + " but it only receives history info for {}  runs.",
+          minJobRuns, numJobs);
+      throw new InvalidInputException("Job ResourceSkyline history",
+          "containing less job runs" + " than " + minJobRuns);
+    }
+
+    return resourceSkylines;
+  }
+}

+ 23 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/solver/preprocess/package-info.java

@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Preprocessor for {@code Solver}.
+ */
+
+package org.apache.hadoop.resourceestimator.solver.preprocess;

+ 163 - 0
hadoop-tools/hadoop-resourceestimator/src/main/java/org/apache/hadoop/resourceestimator/translator/api/JobMetaData.java

@@ -0,0 +1,163 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.resourceestimator.translator.api;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.hadoop.resourceestimator.common.api.RecurrenceId;
+import org.apache.hadoop.resourceestimator.common.api.ResourceSkyline;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation;
+import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationInterval;
+import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Job metadata collected when parsing the log file.
+ */
+public class JobMetaData {
+  // containerId, releaseTime
+  private static final Logger LOGGER =
+      LoggerFactory.getLogger(JobMetaData.class);
+  private final ResourceSkyline resourceSkyline = new ResourceSkyline();
+  private final Map<String, Long> rawStart = new HashMap<String, Long>();
+  // containerId, startTime
+  private final Map<String, Long> rawEnd = new HashMap<String, Long>();
+  private RecurrenceId recurrenceId;
+
+  /**
+   * Constructor.
+   *
+   * @param jobSubmissionTimeConfig job submission time.
+   */
+  public JobMetaData(final long jobSubmissionTimeConfig) {
+    resourceSkyline.setJobSubmissionTime(jobSubmissionTimeConfig);
+  }
+
+  /**
+   * Set job finish time.
+   *
+   * @param jobFinishTimeConfig job finish time.
+   * @return the reference to current {@link JobMetaData}.
+   */
+  public final JobMetaData setJobFinishTime(final long jobFinishTimeConfig) {
+    resourceSkyline.setJobFinishTime(jobFinishTimeConfig);
+    return this;
+  }
+
+  /**
+   * Add container launch time.
+   *
+   * @param containerId id of the container.
+   * @param time        container launch time.
+   * @return the reference to current {@link JobMetaData}.
+   */
+  public final JobMetaData setContainerStart(final String containerId,
+      final long time) {
+    if (rawStart.put(containerId, time) != null) {
+      LOGGER.warn("find duplicate container launch time for {}, so we replace"
+              + " it with {}.", containerId, time);
+    }
+    return this;
+  }
+
+  /**
+   * Add container release time.
+   *
+   * @param containerId id of the container.
+   * @param time        container release time.
+   * @return the reference to current {@link JobMetaData}.
+   */
+  public final JobMetaData setContainerEnd(final String containerId,
+      final long time) {
+    if (rawEnd.put(containerId, time) != null) {
+      LOGGER.warn("find duplicate container release time for {}, so we replace"
+          + " it with {}.", containerId, time);
+    }
+    return this;
+  }
+
+  /**
+   * Get {@link RecurrenceId}.
+   *
+   * @return {@link RecurrenceId}.
+   */
+  public final RecurrenceId getRecurrenceId() {
+    return recurrenceId;
+  }
+
+  /**
+   * Set {@link RecurrenceId}.
+   *
+   * @param recurrenceIdConfig the {@link RecurrenceId}.
+   * @return the reference to current {@link JobMetaData}.
+   */
+  public final JobMetaData setRecurrenceId(
+      final RecurrenceId recurrenceIdConfig) {
+    this.recurrenceId = recurrenceIdConfig;
+    return this;
+  }
+
+  /**
+   * Get {@link ResourceSkyline}.
+   *
+   * @return {@link ResourceSkyline}.
+   */
+  public final ResourceSkyline getResourceSkyline() {
+    return resourceSkyline;
+  }
+
+  /**
+   * Normalized container launch/release time, and generate the
+   * {@link ResourceSkyline}.
+   */
+  public final void createSkyline() {
+    final long jobSubmissionTime = resourceSkyline.getJobSubmissionTime();
+    Resource containerSpec = resourceSkyline.getContainerSpec();
+    final TreeMap<Long, Resource> resourceOverTime = new TreeMap<>();
+    final RLESparseResourceAllocation skylineList =
+        new RLESparseResourceAllocation(resourceOverTime,
+            new DefaultResourceCalculator());
+    resourceSkyline.setSkylineList(skylineList);
+    if (containerSpec == null) {
+      // if RmParser fails to extract container resource spec from logs, we will
+      // statically set
+      // it to be <1core, 1GB>
+      containerSpec = Resource.newInstance(1024, 1);
+    }
+    resourceSkyline.setContainerSpec(containerSpec);
+    for (final Map.Entry<String, Long> entry : rawStart.entrySet()) {
+      final long timeStart = entry.getValue();
+      final Long timeEnd = rawEnd.get(entry.getKey());
+      if (timeEnd == null) {
+        LOGGER.warn("container release time not found for {}.", entry.getKey());
+      } else {
+        final ReservationInterval riAdd =
+            new ReservationInterval((timeStart - jobSubmissionTime) / 1000,
+                (timeEnd - jobSubmissionTime) / 1000);
+        resourceSkyline.getSkylineList().addInterval(riAdd, containerSpec);
+      }
+    }
+  }
+}

Vissa filer visades inte eftersom för många filer har ändrats