Pārlūkot izejas kodu

Merge branch 'trunk' into ozone-0.4.1. This is the second merge to pick up a NPE fix in Trunk.

Anu Engineer 5 gadi atpakaļ
vecāks
revīzija
fc3bfdc535
70 mainītis faili ar 2139 papildinājumiem un 978 dzēšanām
  1. 3 3
      hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientGrpc.java
  2. 1 0
      hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientMetrics.java
  3. 1 2
      hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockOutputStream.java
  4. 5 0
      hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java
  5. 13 1
      hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBStore.java
  6. 9 0
      hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBStore.java
  7. 73 13
      hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/TypedTable.java
  8. 58 0
      hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/cache/CacheResult.java
  9. 28 3
      hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/cache/TableCache.java
  10. 69 10
      hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/cache/TableCacheImpl.java
  11. 51 20
      hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/cache/TestTableCacheImpl.java
  12. 7 2
      hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerDataYaml.java
  13. 25 6
      hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
  14. 20 35
      hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
  15. 3 1
      hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMCommonPolicy.java
  16. 17 8
      hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestContainerPlacementFactory.java
  17. 1 1
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml
  18. 28 0
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouter.java
  19. 62 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java
  20. 6 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
  21. 92 54
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
  22. 4 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
  23. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsQuotaAdminGuide.md
  24. 5 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
  25. 55 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
  26. 34 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
  27. 4 4
      hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml
  28. 3 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java
  29. 3 0
      hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/audit/OMAction.java
  30. 2 0
      hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMException.java
  31. 0 49
      hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmDeleteVolumeResponse.java
  32. 0 56
      hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmVolumeOwnerChangeResponse.java
  33. 0 77
      hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OzoneManagerHAProtocol.java
  34. 13 7
      hadoop-ozone/common/src/main/proto/OzoneManagerProtocol.proto
  35. 156 0
      hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/Test2WayCommitInRatis.java
  36. 8 44
      hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java
  37. 168 0
      hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestMultiBlockWritesWithDnFailures.java
  38. 31 69
      hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestWatchForCommit.java
  39. 3 3
      hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmMetrics.java
  40. 0 3
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/BucketManagerImpl.java
  41. 24 0
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java
  42. 5 2
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
  43. 0 77
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
  44. 3 10
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/S3BucketManagerImpl.java
  45. 4 46
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/VolumeManager.java
  46. 13 89
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/VolumeManagerImpl.java
  47. 1 0
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java
  48. 3 0
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/utils/OzoneManagerRatisUtils.java
  49. 11 0
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCommitRequest.java
  50. 361 0
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/bucket/S3BucketCreateRequest.java
  51. 23 0
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/bucket/package-info.java
  52. 12 24
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/volume/OMVolumeCreateRequest.java
  53. 1 3
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/volume/OMVolumeDeleteRequest.java
  54. 40 5
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/volume/OMVolumeRequest.java
  55. 1 3
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/volume/OMVolumeSetOwnerRequest.java
  56. 1 2
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/volume/OMVolumeSetQuotaRequest.java
  57. 73 0
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/s3/bucket/S3BucketCreateResponse.java
  58. 24 0
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/s3/bucket/package-info.java
  59. 0 11
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerHARequestHandler.java
  60. 1 168
      hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerHARequestHandlerImpl.java
  61. 20 0
      hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/TestOMRequestUtils.java
  62. 246 0
      hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/bucket/TestS3BucketCreateRequest.java
  63. 23 0
      hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/bucket/package-info.java
  64. 25 2
      hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/volume/TestOMVolumeCreateRequest.java
  65. 6 12
      hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/volume/TestOMVolumeDeleteRequest.java
  66. 119 0
      hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/s3/bucket/TestS3BucketCreateResponse.java
  67. 23 0
      hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/s3/bucket/package-info.java
  68. 2 39
      hadoop-ozone/ozonefs/src/main/java/org/apache/hadoop/fs/ozone/BasicOzoneFileSystem.java
  69. 7 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java
  70. 4 4
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YarnApplicationSecurity.md

+ 3 - 3
hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientGrpc.java

@@ -285,7 +285,7 @@ public class XceiverClientGrpc extends XceiverClientSpi {
         }
         break;
       } catch (ExecutionException | InterruptedException | IOException e) {
-        LOG.debug("Failed to execute command " + request + " on datanode " + dn
+        LOG.error("Failed to execute command " + request + " on datanode " + dn
             .getUuidString(), e);
         if (!(e instanceof IOException)) {
           if (Status.fromThrowable(e.getCause()).getCode()
@@ -306,8 +306,8 @@ public class XceiverClientGrpc extends XceiverClientSpi {
       return reply;
     } else {
       Preconditions.checkNotNull(ioException);
-      LOG.error("Failed to execute command " + request + " on the pipeline "
-          + pipeline.getId());
+      LOG.error("Failed to execute command {} on the pipeline {}.", request,
+          pipeline);
       throw ioException;
     }
   }

+ 1 - 0
hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientMetrics.java

@@ -69,6 +69,7 @@ public class XceiverClientMetrics {
   }
 
   public static XceiverClientMetrics create() {
+    DefaultMetricsSystem.initialize(SOURCE_NAME);
     MetricsSystem ms = DefaultMetricsSystem.instance();
     return ms.register(SOURCE_NAME, "Storage Container Client Metrics",
         new XceiverClientMetrics());

+ 1 - 2
hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockOutputStream.java

@@ -42,7 +42,6 @@ import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.nio.ByteBuffer;
-import java.util.Collections;
 import java.util.List;
 import java.util.ArrayList;
 import java.util.Map;
@@ -160,7 +159,7 @@ public class BlockOutputStream extends OutputStream {
     bufferList = null;
     totalDataFlushedLength = 0;
     writtenDataLength = 0;
-    failedServers = Collections.emptyList();
+    failedServers = new ArrayList<>(0);
     ioException = new AtomicReference<>(null);
   }
 

+ 5 - 0
hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java

@@ -264,6 +264,7 @@ public final class OzoneConsts {
   public static final String UPLOAD_ID = "uploadID";
   public static final String PART_NUMBER_MARKER = "partNumberMarker";
   public static final String MAX_PARTS = "maxParts";
+  public static final String S3_BUCKET = "s3Bucket";
 
 
 
@@ -303,4 +304,8 @@ public final class OzoneConsts {
   public static final String JAVA_TMP_DIR = "java.io.tmpdir";
   public static final String LOCALHOST = "localhost";
 
+
+  public static final int S3_BUCKET_MIN_LENGTH = 3;
+  public static final int S3_BUCKET_MAX_LENGTH = 64;
+
 }

+ 13 - 1
hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBStore.java

@@ -25,6 +25,7 @@ import java.util.ArrayList;
 import java.util.Map;
 
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.utils.db.cache.TableCacheImpl;
 
 /**
  * The DBStore interface provides the ability to create Tables, which store
@@ -47,7 +48,9 @@ public interface DBStore extends AutoCloseable {
 
 
   /**
-   * Gets an existing TableStore with implicit key/value conversion.
+   * Gets an existing TableStore with implicit key/value conversion and
+   * with default cleanup policy for cache. Default cache clean up policy is
+   * manual.
    *
    * @param name - Name of the TableStore to get
    * @param keyType
@@ -58,6 +61,15 @@ public interface DBStore extends AutoCloseable {
   <KEY, VALUE> Table<KEY, VALUE> getTable(String name,
       Class<KEY> keyType, Class<VALUE> valueType) throws IOException;
 
+  /**
+   * Gets an existing TableStore with implicit key/value conversion and
+   * with specified cleanup policy for cache.
+   * @throws IOException
+   */
+  <KEY, VALUE> Table<KEY, VALUE> getTable(String name,
+      Class<KEY> keyType, Class<VALUE> valueType,
+      TableCacheImpl.CacheCleanupPolicy cleanupPolicy) throws IOException;
+
   /**
    * Lists the Known list of Tables in a DB.
    *

+ 9 - 0
hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBStore.java

@@ -38,6 +38,7 @@ import org.apache.hadoop.metrics2.util.MBeans;
 import org.apache.hadoop.utils.RocksDBStoreMBean;
 
 import com.google.common.base.Preconditions;
+import org.apache.hadoop.utils.db.cache.TableCacheImpl;
 import org.apache.ratis.thirdparty.com.google.common.annotations.VisibleForTesting;
 import org.rocksdb.ColumnFamilyDescriptor;
 import org.rocksdb.ColumnFamilyHandle;
@@ -260,6 +261,14 @@ public class RDBStore implements DBStore {
         valueType);
   }
 
+  @Override
+  public <KEY, VALUE> Table<KEY, VALUE> getTable(String name,
+      Class<KEY> keyType, Class<VALUE> valueType,
+      TableCacheImpl.CacheCleanupPolicy cleanupPolicy) throws IOException {
+    return new TypedTable<KEY, VALUE>(getTable(name), codecRegistry, keyType,
+        valueType, cleanupPolicy);
+  }
+
   @Override
   public ArrayList<Table> listTables() throws IOException {
     ArrayList<Table> returnList = new ArrayList<>();

+ 73 - 13
hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/TypedTable.java

@@ -23,11 +23,16 @@ import java.util.Iterator;
 import java.util.Map;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Optional;
 import org.apache.hadoop.utils.db.cache.CacheKey;
+import org.apache.hadoop.utils.db.cache.CacheResult;
 import org.apache.hadoop.utils.db.cache.CacheValue;
-import org.apache.hadoop.utils.db.cache.PartialTableCache;
+import org.apache.hadoop.utils.db.cache.TableCacheImpl;
 import org.apache.hadoop.utils.db.cache.TableCache;
+import org.apache.hadoop.utils.db.cache.TableCacheImpl.CacheCleanupPolicy;
 
+import static org.apache.hadoop.utils.db.cache.CacheResult.CacheStatus.EXISTS;
+import static org.apache.hadoop.utils.db.cache.CacheResult.CacheStatus.NOT_EXIST;
 /**
  * Strongly typed table implementation.
  * <p>
@@ -49,16 +54,61 @@ public class TypedTable<KEY, VALUE> implements Table<KEY, VALUE> {
 
   private final TableCache<CacheKey<KEY>, CacheValue<VALUE>> cache;
 
+  private final static long EPOCH_DEFAULT = -1L;
 
+  /**
+   * Create an TypedTable from the raw table.
+   * Default cleanup policy used for the table is
+   * {@link CacheCleanupPolicy#MANUAL}.
+   * @param rawTable
+   * @param codecRegistry
+   * @param keyType
+   * @param valueType
+   */
+  public TypedTable(
+      Table<byte[], byte[]> rawTable,
+      CodecRegistry codecRegistry, Class<KEY> keyType,
+      Class<VALUE> valueType) throws IOException {
+    this(rawTable, codecRegistry, keyType, valueType,
+        CacheCleanupPolicy.MANUAL);
+  }
+
+  /**
+   * Create an TypedTable from the raw table with specified cleanup policy
+   * for table cache.
+   * @param rawTable
+   * @param codecRegistry
+   * @param keyType
+   * @param valueType
+   * @param cleanupPolicy
+   */
   public TypedTable(
       Table<byte[], byte[]> rawTable,
       CodecRegistry codecRegistry, Class<KEY> keyType,
-      Class<VALUE> valueType) {
+      Class<VALUE> valueType,
+      TableCacheImpl.CacheCleanupPolicy cleanupPolicy) throws IOException {
     this.rawTable = rawTable;
     this.codecRegistry = codecRegistry;
     this.keyType = keyType;
     this.valueType = valueType;
-    cache = new PartialTableCache<>();
+    cache = new TableCacheImpl<>(cleanupPolicy);
+
+    if (cleanupPolicy == CacheCleanupPolicy.NEVER) {
+      //fill cache
+      try(TableIterator<KEY, ? extends KeyValue<KEY, VALUE>> tableIterator =
+              iterator()) {
+
+        while (tableIterator.hasNext()) {
+          KeyValue< KEY, VALUE > kv = tableIterator.next();
+
+          // We should build cache after OM restart when clean up policy is
+          // NEVER. Setting epoch value -1, so that when it is marked for
+          // delete, this will be considered for cleanup.
+          cache.put(new CacheKey<>(kv.getKey()),
+              new CacheValue<>(Optional.of(kv.getValue()), EPOCH_DEFAULT));
+        }
+      }
+    }
   }
 
   @Override
@@ -83,9 +133,17 @@ public class TypedTable<KEY, VALUE> implements Table<KEY, VALUE> {
 
   @Override
   public boolean isExist(KEY key) throws IOException {
-    CacheValue<VALUE> cacheValue= cache.get(new CacheKey<>(key));
-    return (cacheValue != null && cacheValue.getCacheValue() != null) ||
-        rawTable.isExist(codecRegistry.asRawData(key));
+
+    CacheResult<CacheValue<VALUE>> cacheResult =
+        cache.lookup(new CacheKey<>(key));
+
+    if (cacheResult.getCacheStatus() == EXISTS) {
+      return true;
+    } else if (cacheResult.getCacheStatus() == NOT_EXIST) {
+      return false;
+    } else {
+      return rawTable.isExist(codecRegistry.asRawData(key));
+    }
   }
 
   /**
@@ -104,14 +162,16 @@ public class TypedTable<KEY, VALUE> implements Table<KEY, VALUE> {
   public VALUE get(KEY key) throws IOException {
     // Here the metadata lock will guarantee that cache is not updated for same
     // key during get key.
-    CacheValue< VALUE > cacheValue = cache.get(new CacheKey<>(key));
-    if (cacheValue == null) {
-      // If no cache for the table or if it does not exist in cache get from
-      // RocksDB table.
-      return getFromTable(key);
+
+    CacheResult<CacheValue<VALUE>> cacheResult =
+        cache.lookup(new CacheKey<>(key));
+
+    if (cacheResult.getCacheStatus() == EXISTS) {
+      return cacheResult.getValue().getCacheValue();
+    } else if (cacheResult.getCacheStatus() == NOT_EXIST) {
+      return null;
     } else {
-      // We have a value in cache, return the value.
-      return cacheValue.getCacheValue();
+      return getFromTable(key);
     }
   }
 

+ 58 - 0
hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/cache/CacheResult.java

@@ -0,0 +1,58 @@
+package org.apache.hadoop.utils.db.cache;
+
+import java.util.Objects;
+
+/**
+ * CacheResult which is returned as response for Key exist in cache or not.
+ * @param <CACHEVALUE>
+ */
+public class CacheResult<CACHEVALUE extends CacheValue> {
+
+  private CacheStatus cacheStatus;
+  private CACHEVALUE cachevalue;
+
+  public CacheResult(CacheStatus cacheStatus, CACHEVALUE cachevalue) {
+    this.cacheStatus = cacheStatus;
+    this.cachevalue = cachevalue;
+  }
+
+  public CacheStatus getCacheStatus() {
+    return cacheStatus;
+  }
+
+  public CACHEVALUE getValue() {
+    return cachevalue;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    CacheResult< ? > that = (CacheResult< ? >) o;
+    return cacheStatus == that.cacheStatus &&
+        Objects.equals(cachevalue, that.cachevalue);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(cacheStatus, cachevalue);
+  }
+
+  /**
+   * Status which tells whether key exists in cache or not.
+   */
+  public enum CacheStatus {
+    EXISTS, // When key exists in cache.
+
+    NOT_EXIST, // We guarantee that it does not exist. This will be returned
+    // when the key does not exist in cache, when cache clean up policy is
+    // NEVER.
+    MAY_EXIST  // This will be returned when the key does not exist in
+    // cache, when cache clean up policy is MANUAL. So caller need to check
+    // if it might exist in it's rocksdb table.
+  }
+}

+ 28 - 3
hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/cache/TableCache.java

@@ -21,7 +21,8 @@ package org.apache.hadoop.utils.db.cache;
 
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability.Evolving;
-
+import org.apache.hadoop.utils.db.cache.CacheResult.CacheStatus;
+import org.apache.hadoop.utils.db.cache.TableCacheImpl.CacheCleanupPolicy;
 import java.util.Iterator;
 import java.util.Map;
 
@@ -52,8 +53,11 @@ public interface TableCache<CACHEKEY extends CacheKey,
 
   /**
    * Removes all the entries from the cache which are having epoch value less
-   * than or equal to specified epoch value. For FullTable Cache this is a
-   * do-nothing operation.
+   * than or equal to specified epoch value.
+   *
+   * If clean up policy is NEVER, this is a do nothing operation.
+   * If clean up policy is MANUAL, it is caller responsibility to cleanup the
+   * cache before calling cleanup.
    * @param epoch
    */
   void cleanup(long epoch);
@@ -69,4 +73,25 @@ public interface TableCache<CACHEKEY extends CacheKey,
    * @return iterator of the underlying cache for the table.
    */
   Iterator<Map.Entry<CACHEKEY, CACHEVALUE>> iterator();
+
+  /**
+   * Check key exist in cache or not.
+   *
+   * If it exists return CacheResult with value and status as
+   * {@link CacheStatus#EXISTS}
+   *
+   * If it does not exist:
+   *  If cache clean up policy is
+   *  {@link TableCacheImpl.CacheCleanupPolicy#NEVER} it means table cache is
+   *  full cache. It return's {@link CacheResult} with null
+   *  and status as {@link CacheStatus#NOT_EXIST}.
+   *
+   *  If cache clean up policy is {@link CacheCleanupPolicy#MANUAL} it means
+   *  table cache is partial cache. It return's {@link CacheResult} with
+   *  null and status as MAY_EXIST.
+   *
+   * @param cachekey
+   */
+  CacheResult<CACHEVALUE> lookup(CACHEKEY cachekey);
+
 }

+ 69 - 10
hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/cache/PartialTableCache.java → hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/cache/TableCacheImpl.java

@@ -32,21 +32,28 @@ import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability.Evolving;
 
 /**
- * Cache implementation for the table, this cache is partial cache, this will
- * be cleaned up, after entries are flushed to DB.
+ * Cache implementation for the table. Depending on the cache clean up policy
+ * this cache will be full cache or partial cache.
+ *
+ * If cache cleanup policy is set as {@link CacheCleanupPolicy#MANUAL},
+ * this will be a partial cache.
+ *
+ * If cache cleanup policy is set as {@link CacheCleanupPolicy#NEVER},
+ * this will be a full cache.
  */
 @Private
 @Evolving
-public class PartialTableCache<CACHEKEY extends CacheKey,
+public class TableCacheImpl<CACHEKEY extends CacheKey,
     CACHEVALUE extends CacheValue> implements TableCache<CACHEKEY, CACHEVALUE> {
 
   private final ConcurrentHashMap<CACHEKEY, CACHEVALUE> cache;
   private final TreeSet<EpochEntry<CACHEKEY>> epochEntries;
   private ExecutorService executorService;
+  private CacheCleanupPolicy cleanupPolicy;
 
 
 
-  public PartialTableCache() {
+  public TableCacheImpl(CacheCleanupPolicy cleanupPolicy) {
     cache = new ConcurrentHashMap<>();
     epochEntries = new TreeSet<>();
     // Created a singleThreadExecutor, so one cleanup will be running at a
@@ -54,7 +61,7 @@ public class PartialTableCache<CACHEKEY extends CacheKey,
     ThreadFactory build = new ThreadFactoryBuilder().setDaemon(true)
         .setNameFormat("PartialTableCache Cleanup Thread - %d").build();
     executorService = Executors.newSingleThreadExecutor(build);
-
+    this.cleanupPolicy = cleanupPolicy;
   }
 
   @Override
@@ -70,7 +77,7 @@ public class PartialTableCache<CACHEKEY extends CacheKey,
 
   @Override
   public void cleanup(long epoch) {
-    executorService.submit(() -> evictCache(epoch));
+    executorService.submit(() -> evictCache(epoch, cleanupPolicy));
   }
 
   @Override
@@ -83,16 +90,24 @@ public class PartialTableCache<CACHEKEY extends CacheKey,
     return cache.entrySet().iterator();
   }
 
-  private void evictCache(long epoch) {
+  private void evictCache(long epoch, CacheCleanupPolicy cacheCleanupPolicy) {
     EpochEntry<CACHEKEY> currentEntry = null;
     for (Iterator<EpochEntry<CACHEKEY>> iterator = epochEntries.iterator();
          iterator.hasNext();) {
       currentEntry = iterator.next();
       CACHEKEY cachekey = currentEntry.getCachekey();
       CacheValue cacheValue = cache.computeIfPresent(cachekey, ((k, v) -> {
-        if (v.getEpoch() <= epoch) {
-          iterator.remove();
-          return null;
+        if (cleanupPolicy == CacheCleanupPolicy.MANUAL) {
+          if (v.getEpoch() <= epoch) {
+            iterator.remove();
+            return null;
+          }
+        } else if (cleanupPolicy == CacheCleanupPolicy.NEVER) {
+          // Remove only entries which are marked for delete.
+          if (v.getEpoch() <= epoch && v.getCacheValue() == null) {
+            iterator.remove();
+            return null;
+          }
         }
         return v;
       }));
@@ -103,4 +118,48 @@ public class PartialTableCache<CACHEKEY extends CacheKey,
       }
     }
   }
+
+  public CacheResult<CACHEVALUE> lookup(CACHEKEY cachekey) {
+
+    // TODO: Remove this check once HA and Non-HA code is merged and all
+    //  requests are converted to use cache and double buffer.
+    // This is to done as temporary instead of passing ratis enabled flag
+    // which requires more code changes. We cannot use ratis enabled flag
+    // also because some of the requests in OM HA are not modified to use
+    // double buffer and cache.
+
+    if (cache.size() == 0) {
+      return new CacheResult<>(CacheResult.CacheStatus.MAY_EXIST,
+          null);
+    }
+
+    CACHEVALUE cachevalue = cache.get(cachekey);
+    if (cachevalue == null) {
+      if (cleanupPolicy == CacheCleanupPolicy.NEVER) {
+        return new CacheResult<>(CacheResult.CacheStatus.NOT_EXIST, null);
+      } else {
+        return new CacheResult<>(CacheResult.CacheStatus.MAY_EXIST,
+            null);
+      }
+    } else {
+      if (cachevalue.getCacheValue() != null) {
+        return new CacheResult<>(CacheResult.CacheStatus.EXISTS, cachevalue);
+      } else {
+        // When entity is marked for delete, cacheValue will be set to null.
+        // In that case we can return NOT_EXIST irrespective of cache cleanup
+        // policy.
+        return new CacheResult<>(CacheResult.CacheStatus.NOT_EXIST, null);
+      }
+    }
+  }
+
+  /**
+   * Cleanup policies for table cache.
+   */
+  public enum CacheCleanupPolicy {
+    NEVER, // Cache will not be cleaned up. This mean's the table maintains
+    // full cache.
+    MANUAL // Cache will be cleaned up, once after flushing to DB. It is
+    // caller's responsibility to flush to DB, before calling cleanup cache.
+  }
 }

+ 51 - 20
hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/cache/TestPartialTableCache.java → hadoop-hdds/common/src/test/java/org/apache/hadoop/utils/db/cache/TestTableCacheImpl.java

@@ -19,6 +19,8 @@
 
 package org.apache.hadoop.utils.db.cache;
 
+import java.util.Arrays;
+import java.util.Collection;
 import java.util.concurrent.CompletableFuture;
 
 import com.google.common.base.Optional;
@@ -26,18 +28,40 @@ import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
 
 import static org.junit.Assert.fail;
 
 /**
  * Class tests partial table cache.
  */
-public class TestPartialTableCache {
+@RunWith(value = Parameterized.class)
+public class TestTableCacheImpl {
   private TableCache<CacheKey<String>, CacheValue<String>> tableCache;
 
+  private final TableCacheImpl.CacheCleanupPolicy cacheCleanupPolicy;
+
+
+  @Parameterized.Parameters
+  public static Collection<Object[]> policy() {
+    Object[][] params = new Object[][] {
+        {TableCacheImpl.CacheCleanupPolicy.NEVER},
+        {TableCacheImpl.CacheCleanupPolicy.MANUAL}
+    };
+    return Arrays.asList(params);
+  }
+
+  public TestTableCacheImpl(
+      TableCacheImpl.CacheCleanupPolicy cacheCleanupPolicy) {
+    this.cacheCleanupPolicy = cacheCleanupPolicy;
+  }
+
+
   @Before
   public void create() {
-    tableCache = new PartialTableCache<>();
+    tableCache =
+        new TableCacheImpl<>(cacheCleanupPolicy);
   }
   @Test
   public void testPartialTableCache() {
@@ -98,33 +122,40 @@ public class TestPartialTableCache {
           tableCache.get(new CacheKey<>(Integer.toString(i))).getCacheValue());
     }
 
-    int deleted = 5;
-    // cleanup first 5 entires
-    tableCache.cleanup(deleted);
 
     value = future.get();
     Assert.assertEquals(10, value);
 
     totalCount += value;
 
-    // We should totalCount - deleted entries in cache.
-    final int tc = totalCount;
-    GenericTestUtils.waitFor(() -> (tc - deleted == tableCache.size()), 100,
-        5000);
-
-    // Check if we have remaining entries.
-    for (int i=6; i <= totalCount; i++) {
-      Assert.assertEquals(Integer.toString(i),
-          tableCache.get(new CacheKey<>(Integer.toString(i))).getCacheValue());
+    if (cacheCleanupPolicy == TableCacheImpl.CacheCleanupPolicy.MANUAL) {
+      int deleted = 5;
+
+      // cleanup first 5 entires
+      tableCache.cleanup(deleted);
+
+      // We should totalCount - deleted entries in cache.
+      final int tc = totalCount;
+      GenericTestUtils.waitFor(() -> (tc - deleted == tableCache.size()), 100,
+          5000);
+      // Check if we have remaining entries.
+      for (int i=6; i <= totalCount; i++) {
+        Assert.assertEquals(Integer.toString(i), tableCache.get(
+            new CacheKey<>(Integer.toString(i))).getCacheValue());
+      }
+      tableCache.cleanup(10);
+
+      tableCache.cleanup(totalCount);
+
+      // Cleaned up all entries, so cache size should be zero.
+      GenericTestUtils.waitFor(() -> (0 == tableCache.size()), 100,
+          5000);
+    } else {
+      tableCache.cleanup(totalCount);
+      Assert.assertEquals(totalCount, tableCache.size());
     }
 
-    tableCache.cleanup(10);
-
-    tableCache.cleanup(totalCount);
 
-    // Cleaned up all entries, so cache size should be zero.
-    GenericTestUtils.waitFor(() -> (0 == tableCache.size()), 100,
-        5000);
   }
 
   private int writeToCache(int count, int startVal, long sleep)

+ 7 - 2
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerDataYaml.java

@@ -80,6 +80,7 @@ public final class ContainerDataYaml {
   public static void createContainerFile(ContainerType containerType,
       ContainerData containerData, File containerFile) throws IOException {
     Writer writer = null;
+    FileOutputStream out = null;
     try {
       // Create Yaml for given container type
       Yaml yaml = getYamlForContainerType(containerType);
@@ -87,13 +88,17 @@ public final class ContainerDataYaml {
       containerData.computeAndSetChecksum(yaml);
 
       // Write the ContainerData with checksum to Yaml file.
-      writer = new OutputStreamWriter(new FileOutputStream(
-          containerFile), "UTF-8");
+      out = new FileOutputStream(
+          containerFile);
+      writer = new OutputStreamWriter(out, "UTF-8");
       yaml.dump(containerData, writer);
 
     } finally {
       try {
         if (writer != null) {
+          writer.flush();
+          // make sure the container metadata is synced to disk.
+          out.getFD().sync();
           writer.close();
         }
       } catch (IOException ex) {

+ 25 - 6
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java

@@ -22,11 +22,13 @@ import com.google.common.base.Preconditions;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdds.HddsUtils;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
 
 import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerNotOpenException;
 import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
+import org.apache.hadoop.ozone.OzoneConfigKeys;
 import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
 import org.apache.hadoop.util.Time;
 import org.apache.ratis.proto.RaftProtos.RaftPeerRole;
@@ -82,6 +84,7 @@ import java.util.concurrent.ExecutionException;
 import java.util.stream.Collectors;
 import java.util.Set;
 import java.util.concurrent.ConcurrentSkipListSet;
+import java.util.concurrent.Executors;
 import java.io.FileOutputStream;
 import java.io.FileInputStream;
 import java.io.OutputStream;
@@ -139,7 +142,6 @@ public class ContainerStateMachine extends BaseStateMachine {
   // keeps track of the containers created per pipeline
   private final Set<Long> createContainerSet;
   private ExecutorService[] executors;
-  private final int numExecutors;
   private final Map<Long, Long> applyTransactionCompletionMap;
   private final Cache<Long, ByteString> stateMachineDataCache;
   private final boolean isBlockTokenEnabled;
@@ -152,15 +154,13 @@ public class ContainerStateMachine extends BaseStateMachine {
   @SuppressWarnings("parameternumber")
   public ContainerStateMachine(RaftGroupId gid, ContainerDispatcher dispatcher,
       ThreadPoolExecutor chunkExecutor, XceiverServerRatis ratisServer,
-      List<ExecutorService> executors, long expiryInterval,
-      boolean isBlockTokenEnabled, TokenVerifier tokenVerifier) {
+      long expiryInterval, boolean isBlockTokenEnabled,
+      TokenVerifier tokenVerifier, Configuration conf) {
     this.gid = gid;
     this.dispatcher = dispatcher;
     this.chunkExecutor = chunkExecutor;
     this.ratisServer = ratisServer;
     metrics = CSMMetrics.create(gid);
-    this.numExecutors = executors.size();
-    this.executors = executors.toArray(new ExecutorService[numExecutors]);
     this.writeChunkFutureMap = new ConcurrentHashMap<>();
     applyTransactionCompletionMap = new ConcurrentHashMap<>();
     stateMachineDataCache = CacheBuilder.newBuilder()
@@ -171,6 +171,19 @@ public class ContainerStateMachine extends BaseStateMachine {
     this.isBlockTokenEnabled = isBlockTokenEnabled;
     this.tokenVerifier = tokenVerifier;
     this.createContainerSet = new ConcurrentSkipListSet<>();
+
+    final int numContainerOpExecutors = conf.getInt(
+        OzoneConfigKeys.DFS_CONTAINER_RATIS_NUM_CONTAINER_OP_EXECUTORS_KEY,
+        OzoneConfigKeys.DFS_CONTAINER_RATIS_NUM_CONTAINER_OP_EXECUTORS_DEFAULT);
+    this.executors = new ExecutorService[numContainerOpExecutors];
+    for (int i = 0; i < numContainerOpExecutors; i++) {
+      final int index = i;
+      this.executors[index] = Executors.newSingleThreadExecutor(r -> {
+        Thread t = new Thread(r);
+        t.setName("RatisApplyTransactionExecutor " + index);
+        return t;
+      });
+    }
   }
 
   @Override
@@ -249,6 +262,9 @@ public class ContainerStateMachine extends BaseStateMachine {
       LOG.info("{}: Taking a snapshot at:{} file {}", gid, ti, snapshotFile);
       try (FileOutputStream fos = new FileOutputStream(snapshotFile)) {
         persistContainerSet(fos);
+        fos.flush();
+        // make sure the snapshot file is synced
+        fos.getFD().sync();
       } catch (IOException ioe) {
         LOG.info("{}: Failed to write snapshot at:{} file {}", gid, ti,
             snapshotFile);
@@ -367,7 +383,7 @@ public class ContainerStateMachine extends BaseStateMachine {
 
   private ExecutorService getCommandExecutor(
       ContainerCommandRequestProto requestProto) {
-    int executorId = (int)(requestProto.getContainerID() % numExecutors);
+    int executorId = (int)(requestProto.getContainerID() % executors.length);
     return executors[executorId];
   }
 
@@ -700,5 +716,8 @@ public class ContainerStateMachine extends BaseStateMachine {
   @Override
   public void close() throws IOException {
     evictStateMachineCache();
+    for (ExecutorService executor : executors) {
+      executor.shutdown();
+    }
   }
 }

+ 20 - 35
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java

@@ -81,8 +81,6 @@ import java.util.Objects;
 import java.util.UUID;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
 
@@ -102,7 +100,6 @@ public final class XceiverServerRatis extends XceiverServer {
   private int port;
   private final RaftServer server;
   private ThreadPoolExecutor chunkExecutor;
-  private final List<ExecutorService> executors;
   private final ContainerDispatcher dispatcher;
   private ClientId clientId = ClientId.randomId();
   private final StateContext context;
@@ -111,16 +108,18 @@ public final class XceiverServerRatis extends XceiverServer {
   private final long cacheEntryExpiryInteval;
   private boolean isStarted = false;
   private DatanodeDetails datanodeDetails;
+  private final Configuration conf;
 
   private XceiverServerRatis(DatanodeDetails dd, int port,
       ContainerDispatcher dispatcher, Configuration conf, StateContext
       context, GrpcTlsConfig tlsConfig, CertificateClient caClient)
       throws IOException {
     super(conf, caClient);
+    this.conf = conf;
     Objects.requireNonNull(dd, "id == null");
     datanodeDetails = dd;
     this.port = port;
-    RaftProperties serverProperties = newRaftProperties(conf);
+    RaftProperties serverProperties = newRaftProperties();
     final int numWriteChunkThreads = conf.getInt(
         OzoneConfigKeys.DFS_CONTAINER_RATIS_NUM_WRITE_CHUNK_THREADS_KEY,
         OzoneConfigKeys.DFS_CONTAINER_RATIS_NUM_WRITE_CHUNK_THREADS_DEFAULT);
@@ -129,23 +128,16 @@ public final class XceiverServerRatis extends XceiverServer {
             100, TimeUnit.SECONDS,
             new ArrayBlockingQueue<>(1024),
             new ThreadPoolExecutor.CallerRunsPolicy());
-    final int numContainerOpExecutors = conf.getInt(
-        OzoneConfigKeys.DFS_CONTAINER_RATIS_NUM_CONTAINER_OP_EXECUTORS_KEY,
-        OzoneConfigKeys.DFS_CONTAINER_RATIS_NUM_CONTAINER_OP_EXECUTORS_DEFAULT);
     this.context = context;
     this.replicationLevel =
         conf.getEnum(OzoneConfigKeys.DFS_CONTAINER_RATIS_REPLICATION_LEVEL_KEY,
             OzoneConfigKeys.DFS_CONTAINER_RATIS_REPLICATION_LEVEL_DEFAULT);
-    this.executors = new ArrayList<>();
     cacheEntryExpiryInteval = conf.getTimeDuration(OzoneConfigKeys.
             DFS_CONTAINER_RATIS_STATEMACHINEDATA_CACHE_EXPIRY_INTERVAL,
         OzoneConfigKeys.
             DFS_CONTAINER_RATIS_STATEMACHINEDATA_CACHE_EXPIRY_INTERVAL_DEFAULT,
         TimeUnit.MILLISECONDS);
     this.dispatcher = dispatcher;
-    for (int i = 0; i < numContainerOpExecutors; i++) {
-      executors.add(Executors.newSingleThreadExecutor());
-    }
 
     RaftServer.Builder builder = RaftServer.newBuilder()
         .setServerId(RatisHelper.toRaftPeerId(dd))
@@ -159,22 +151,22 @@ public final class XceiverServerRatis extends XceiverServer {
 
   private ContainerStateMachine getStateMachine(RaftGroupId gid) {
     return new ContainerStateMachine(gid, dispatcher, chunkExecutor, this,
-        Collections.unmodifiableList(executors), cacheEntryExpiryInteval,
-        getSecurityConfig().isBlockTokenEnabled(), getBlockTokenVerifier());
+        cacheEntryExpiryInteval, getSecurityConfig().isBlockTokenEnabled(),
+        getBlockTokenVerifier(), conf);
   }
 
-  private RaftProperties newRaftProperties(Configuration conf) {
+  private RaftProperties newRaftProperties() {
     final RaftProperties properties = new RaftProperties();
 
     // Set rpc type
-    final RpcType rpc = setRpcType(conf, properties);
+    final RpcType rpc = setRpcType(properties);
 
     // set raft segment size
-    setRaftSegmentSize(conf, properties);
+    setRaftSegmentSize(properties);
 
     // set raft segment pre-allocated size
     final int raftSegmentPreallocatedSize =
-        setRaftSegmentPreallocatedSize(conf, properties);
+        setRaftSegmentPreallocatedSize(properties);
 
     // Set max write buffer size, which is the scm chunk size
     final int maxChunkSize = setMaxWriteBuffer(properties);
@@ -196,19 +188,19 @@ public final class XceiverServerRatis extends XceiverServer {
         .setSyncTimeout(properties, dataSyncTimeout);
 
     // Set the server Request timeout
-    setServerRequestTimeout(conf, properties);
+    setServerRequestTimeout(properties);
 
     // set timeout for a retry cache entry
-    setTimeoutForRetryCache(conf, properties);
+    setTimeoutForRetryCache(properties);
 
     // Set the ratis leader election timeout
-    setRatisLeaderElectionTimeout(conf, properties);
+    setRatisLeaderElectionTimeout(properties);
 
     // Set the maximum cache segments
     RaftServerConfigKeys.Log.setMaxCachedSegmentNum(properties, 2);
 
     // set the node failure timeout
-    setNodeFailureTimeout(conf, properties);
+    setNodeFailureTimeout(properties);
 
     // Set the ratis storage directory
     String storageDir = HddsServerUtil.getOzoneDatanodeRatisDirectory(conf);
@@ -266,8 +258,7 @@ public final class XceiverServerRatis extends XceiverServer {
     return properties;
   }
 
-  private void setNodeFailureTimeout(Configuration conf,
-                                     RaftProperties properties) {
+  private void setNodeFailureTimeout(RaftProperties properties) {
     TimeUnit timeUnit;
     long duration;
     timeUnit = OzoneConfigKeys.DFS_RATIS_SERVER_FAILURE_DURATION_DEFAULT
@@ -285,8 +276,7 @@ public final class XceiverServerRatis extends XceiverServer {
     nodeFailureTimeoutMs = nodeFailureTimeout.toLong(TimeUnit.MILLISECONDS);
   }
 
-  private void setRatisLeaderElectionTimeout(Configuration conf,
-                                             RaftProperties properties) {
+  private void setRatisLeaderElectionTimeout(RaftProperties properties) {
     long duration;
     TimeUnit leaderElectionMinTimeoutUnit =
         OzoneConfigKeys.
@@ -307,8 +297,7 @@ public final class XceiverServerRatis extends XceiverServer {
         TimeDuration.valueOf(leaderElectionMaxTimeout, TimeUnit.MILLISECONDS));
   }
 
-  private void setTimeoutForRetryCache(Configuration conf,
-                                       RaftProperties properties) {
+  private void setTimeoutForRetryCache(RaftProperties properties) {
     TimeUnit timeUnit;
     long duration;
     timeUnit =
@@ -324,8 +313,7 @@ public final class XceiverServerRatis extends XceiverServer {
         .setExpiryTime(properties, retryCacheTimeout);
   }
 
-  private void setServerRequestTimeout(Configuration conf,
-                                       RaftProperties properties) {
+  private void setServerRequestTimeout(RaftProperties properties) {
     TimeUnit timeUnit;
     long duration;
     timeUnit = OzoneConfigKeys.DFS_RATIS_SERVER_REQUEST_TIMEOUT_DURATION_DEFAULT
@@ -347,8 +335,7 @@ public final class XceiverServerRatis extends XceiverServer {
     return maxChunkSize;
   }
 
-  private int setRaftSegmentPreallocatedSize(Configuration conf,
-                                             RaftProperties properties) {
+  private int setRaftSegmentPreallocatedSize(RaftProperties properties) {
     final int raftSegmentPreallocatedSize = (int) conf.getStorageSize(
         OzoneConfigKeys.DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_KEY,
         OzoneConfigKeys.DFS_CONTAINER_RATIS_SEGMENT_PREALLOCATED_SIZE_DEFAULT,
@@ -371,8 +358,7 @@ public final class XceiverServerRatis extends XceiverServer {
     return raftSegmentPreallocatedSize;
   }
 
-  private void setRaftSegmentSize(Configuration conf,
-                                  RaftProperties properties) {
+  private void setRaftSegmentSize(RaftProperties properties) {
     final int raftSegmentSize = (int)conf.getStorageSize(
         OzoneConfigKeys.DFS_CONTAINER_RATIS_SEGMENT_SIZE_KEY,
         OzoneConfigKeys.DFS_CONTAINER_RATIS_SEGMENT_SIZE_DEFAULT,
@@ -381,7 +367,7 @@ public final class XceiverServerRatis extends XceiverServer {
         SizeInBytes.valueOf(raftSegmentSize));
   }
 
-  private RpcType setRpcType(Configuration conf, RaftProperties properties) {
+  private RpcType setRpcType(RaftProperties properties) {
     final String rpcType = conf.get(
         OzoneConfigKeys.DFS_CONTAINER_RATIS_RPC_TYPE_KEY,
         OzoneConfigKeys.DFS_CONTAINER_RATIS_RPC_TYPE_DEFAULT);
@@ -447,7 +433,6 @@ public final class XceiverServerRatis extends XceiverServer {
         // some of the tasks would be executed using the executors.
         server.close();
         chunkExecutor.shutdown();
-        executors.forEach(ExecutorService::shutdown);
         isStarted = false;
       } catch (IOException e) {
         throw new RuntimeException(e);

+ 3 - 1
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/SCMCommonPolicy.java

@@ -109,7 +109,9 @@ public abstract class SCMCommonPolicy implements ContainerPlacementPolicy {
       int nodesRequired, final long sizeRequired) throws SCMException {
     List<DatanodeDetails> healthyNodes =
         nodeManager.getNodes(HddsProtos.NodeState.HEALTHY);
-    healthyNodes.removeAll(excludedNodes);
+    if (excludedNodes != null) {
+      healthyNodes.removeAll(excludedNodes);
+    }
     String msg;
     if (healthyNodes.size() == 0) {
       msg = "No healthy node found to allocate container.";

+ 17 - 8
hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestContainerPlacementFactory.java

@@ -63,6 +63,13 @@ public class TestContainerPlacementFactory {
   public void setup() {
     //initialize network topology instance
     conf = new OzoneConfiguration();
+  }
+
+  @Test
+  public void testRackAwarePolicy() throws IOException {
+    conf.set(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY,
+        SCMContainerPlacementRackAware.class.getName());
+
     NodeSchema[] schemas = new NodeSchema[]
         {ROOT_SCHEMA, RACK_SCHEMA, LEAF_SCHEMA};
     NodeSchemaManager.getInstance().init(schemas, true);
@@ -91,11 +98,7 @@ public class TestContainerPlacementFactory {
         .thenReturn(new SCMNodeMetric(storageCapacity, 80L, 20L));
     when(nodeManager.getNodeStat(datanodes.get(4)))
         .thenReturn(new SCMNodeMetric(storageCapacity, 70L, 30L));
-  }
-
 
-  @Test
-  public void testDefaultPolicy() throws IOException {
     ContainerPlacementPolicy policy = ContainerPlacementPolicyFactory
         .getPolicy(conf, nodeManager, cluster, true);
 
@@ -111,14 +114,21 @@ public class TestContainerPlacementFactory {
         datanodeDetails.get(2)));
   }
 
+  @Test
+  public void testDefaultPolicy() throws IOException {
+    ContainerPlacementPolicy policy = ContainerPlacementPolicyFactory
+        .getPolicy(conf, null, null, true);
+    Assert.assertSame(SCMContainerPlacementRandom.class, policy.getClass());
+  }
+
   /**
    * A dummy container placement implementation for test.
    */
-  public class DummyImpl implements ContainerPlacementPolicy {
+  public static class DummyImpl implements ContainerPlacementPolicy {
     @Override
     public List<DatanodeDetails> chooseDatanodes(
         List<DatanodeDetails> excludedNodes, List<DatanodeDetails> favoredNodes,
-        int nodesRequired, long sizeRequired) throws IOException {
+        int nodesRequired, long sizeRequired) {
       return null;
     }
   }
@@ -127,8 +137,7 @@ public class TestContainerPlacementFactory {
   public void testConstuctorNotFound() throws SCMException {
     // set a placement class which does't have the right constructor implemented
     conf.set(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY,
-        "org.apache.hadoop.hdds.scm.container.placement.algorithms." +
-            "TestContainerPlacementFactory$DummyImpl");
+        DummyImpl.class.getName());
     ContainerPlacementPolicyFactory.getPolicy(conf, null, null, true);
   }
 

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml

@@ -383,7 +383,7 @@
 
   <property>
     <name>dfs.federation.router.namenode.heartbeat.enable</name>
-    <value>true</value>
+    <value></value>
     <description>
       If true, get namenode heartbeats and send into the State Store.
       If not explicitly specified takes the same value as for

+ 28 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouter.java

@@ -33,6 +33,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.server.federation.MockResolver;
 import org.apache.hadoop.hdfs.server.federation.RouterConfigBuilder;
@@ -260,4 +261,31 @@ public class TestRouter {
     router.close();
   }
 
+  @Test
+  public void testNamenodeHeartBeatEnableDefault() throws IOException {
+    checkNamenodeHeartBeatEnableDefault(true);
+    checkNamenodeHeartBeatEnableDefault(false);
+  }
+
+  /**
+   * Check the default value of dfs.federation.router.namenode.heartbeat.enable
+   * when it isn't explicitly defined.
+   * @param enable value for dfs.federation.router.heartbeat.enable.
+   */
+  private void checkNamenodeHeartBeatEnableDefault(boolean enable)
+      throws IOException {
+    final Router router = new Router();
+    try {
+      Configuration config = new HdfsConfiguration();
+      config.setBoolean(RBFConfigKeys.DFS_ROUTER_HEARTBEAT_ENABLE, enable);
+      router.init(config);
+      if (enable) {
+        assertNotNull(router.getNamenodeHeartbeatServices());
+      } else {
+        assertNull(router.getNamenodeHeartbeatServices());
+      }
+    } finally {
+      router.close();
+    }
+  }
 }

+ 62 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java

@@ -17,8 +17,14 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
+import java.util.concurrent.TimeUnit;
 import org.apache.hadoop.hdfs.server.aliasmap.InMemoryAliasMap;
 import org.apache.hadoop.hdfs.server.common.Util;
+
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY;
 import static org.apache.hadoop.util.Time.monotonicNow;
 
 import java.net.HttpURLConnection;
@@ -89,6 +95,10 @@ public class ImageServlet extends HttpServlet {
   private SortedSet<ImageUploadRequest> currentlyDownloadingCheckpoints = Collections
       .<ImageUploadRequest> synchronizedSortedSet(new TreeSet<ImageUploadRequest>());
 
+  public static final String RECENT_IMAGE_CHECK_ENABLED =
+      "recent.image.check.enabled";
+  public static final boolean RECENT_IMAGE_CHECK_ENABLED_DEFAULT = true;
+
   @Override
   public void doGet(final HttpServletRequest request,
       final HttpServletResponse response) throws ServletException, IOException {
@@ -507,6 +517,23 @@ public class ImageServlet extends HttpServlet {
       final PutImageParams parsedParams = new PutImageParams(request, response,
           conf);
       final NameNodeMetrics metrics = NameNode.getNameNodeMetrics();
+      final boolean checkRecentImageEnable;
+      Object checkRecentImageEnableObj =
+          context.getAttribute(RECENT_IMAGE_CHECK_ENABLED);
+      if (checkRecentImageEnableObj != null) {
+        if (checkRecentImageEnableObj instanceof Boolean) {
+          checkRecentImageEnable = (boolean) checkRecentImageEnableObj;
+        } else {
+          // This is an error case, but crashing NN due to this
+          // seems more undesirable. Only log the error and set to default.
+          LOG.error("Expecting boolean obj for setting checking recent image, "
+              + "but got " + checkRecentImageEnableObj.getClass() + ". This is "
+              + "unexpected! Setting to default.");
+          checkRecentImageEnable = RECENT_IMAGE_CHECK_ENABLED_DEFAULT;
+        }
+      } else {
+        checkRecentImageEnable = RECENT_IMAGE_CHECK_ENABLED_DEFAULT;
+      }
 
       validateRequest(context, conf, request, response, nnImage,
           parsedParams.getStorageInfoString());
@@ -520,7 +547,8 @@ public class ImageServlet extends HttpServlet {
               // target (regardless of the fact that we got the image)
               HAServiceProtocol.HAServiceState state = NameNodeHttpServer
                   .getNameNodeStateFromContext(getServletContext());
-              if (state != HAServiceProtocol.HAServiceState.ACTIVE) {
+              if (state != HAServiceProtocol.HAServiceState.ACTIVE &&
+                  state != HAServiceProtocol.HAServiceState.OBSERVER) {
                 // we need a different response type here so the client can differentiate this
                 // from the failure to upload due to (1) security, or (2) other checkpoints already
                 // present
@@ -554,6 +582,39 @@ public class ImageServlet extends HttpServlet {
                         + txid);
                 return null;
               }
+
+              long now = System.currentTimeMillis();
+              long lastCheckpointTime =
+                  nnImage.getStorage().getMostRecentCheckpointTime();
+              long lastCheckpointTxid =
+                  nnImage.getStorage().getMostRecentCheckpointTxId();
+
+              long checkpointPeriod =
+                  conf.getTimeDuration(DFS_NAMENODE_CHECKPOINT_PERIOD_KEY,
+                      DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT, TimeUnit.SECONDS);
+              long checkpointTxnCount =
+                  conf.getLong(DFS_NAMENODE_CHECKPOINT_TXNS_KEY,
+                      DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT);
+
+              long timeDelta = TimeUnit.MILLISECONDS.toSeconds(
+                  now - lastCheckpointTime);
+
+              if (checkRecentImageEnable &&
+                  timeDelta < checkpointPeriod &&
+                  txid - lastCheckpointTxid < checkpointTxnCount) {
+                // only when at least one of two conditions are met we accept
+                // a new fsImage
+                // 1. most recent image's txid is too far behind
+                // 2. last checkpoint time was too old
+                response.sendError(HttpServletResponse.SC_CONFLICT,
+                    "Most recent checkpoint is neither too far behind in "
+                        + "txid, nor too old. New txnid cnt is "
+                        + (txid - lastCheckpointTxid)
+                        + ", expecting at least " + checkpointTxnCount
+                        + " unless too long since last upload.");
+                return null;
+              }
+
               try {
                 if (nnImage.getStorage().findImageFile(nnf, txid) != null) {
                   response.sendError(HttpServletResponse.SC_CONFLICT,

+ 6 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java

@@ -72,6 +72,7 @@ import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
+import org.apache.hadoop.http.HttpServer2;
 import org.apache.hadoop.ipc.ExternalCall;
 import org.apache.hadoop.ipc.RefreshCallQueueProtocol;
 import org.apache.hadoop.ipc.RetriableException;
@@ -440,6 +441,11 @@ public class NameNode extends ReconfigurableBase implements
     return rpcServer;
   }
 
+  @VisibleForTesting
+  public HttpServer2 getHttpServer() {
+    return httpServer.getHttpServer();
+  }
+
   public void queueExternalCall(ExternalCall<?> extCall)
       throws IOException, InterruptedException {
     if (rpcServer == null) {

+ 92 - 54
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java

@@ -19,14 +19,16 @@ package org.apache.hadoop.hdfs.server.namenode.ha;
 
 import static org.apache.hadoop.util.Time.monotonicNow;
 
+import com.google.common.collect.Lists;
 import java.io.IOException;
 import java.net.URI;
 import java.net.URL;
 import java.security.PrivilegedAction;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.*;
-import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
@@ -67,7 +69,6 @@ public class StandbyCheckpointer {
   private final Configuration conf;
   private final FSNamesystem namesystem;
   private long lastCheckpointTime;
-  private long lastUploadTime;
   private final CheckpointerThread thread;
   private final ThreadFactory uploadThreadFactory;
   private List<URL> activeNNAddresses;
@@ -75,11 +76,13 @@ public class StandbyCheckpointer {
 
   private final Object cancelLock = new Object();
   private Canceler canceler;
-  private boolean isPrimaryCheckPointer = true;
 
   // Keep track of how many checkpoints were canceled.
   // This is for use in tests.
   private static int canceledCount = 0;
+
+  // A map from NN url to the most recent image upload time.
+  private final HashMap<String, CheckpointReceiverEntry> checkpointReceivers;
   
   public StandbyCheckpointer(Configuration conf, FSNamesystem ns)
       throws IOException {
@@ -89,8 +92,38 @@ public class StandbyCheckpointer {
     this.thread = new CheckpointerThread();
     this.uploadThreadFactory = new ThreadFactoryBuilder().setDaemon(true)
         .setNameFormat("TransferFsImageUpload-%d").build();
-
     setNameNodeAddresses(conf);
+    this.checkpointReceivers = new HashMap<>();
+    for (URL address : activeNNAddresses) {
+      this.checkpointReceivers.put(address.toString(),
+          new CheckpointReceiverEntry());
+    }
+  }
+
+  private static final class CheckpointReceiverEntry {
+    private long lastUploadTime;
+    private boolean isPrimary;
+
+    CheckpointReceiverEntry() {
+      this.lastUploadTime = 0L;
+      this.isPrimary = true;
+    }
+
+    void setLastUploadTime(long lastUploadTime) {
+      this.lastUploadTime = lastUploadTime;
+    }
+
+    void setIsPrimary(boolean isPrimaryFor) {
+      this.isPrimary = isPrimaryFor;
+    }
+
+    long getLastUploadTime() {
+      return lastUploadTime;
+    }
+
+    boolean isPrimary() {
+      return isPrimary;
+    }
   }
 
   /**
@@ -158,7 +191,7 @@ public class StandbyCheckpointer {
     thread.interrupt();
   }
 
-  private void doCheckpoint(boolean sendCheckpoint) throws InterruptedException, IOException {
+  private void doCheckpoint() throws InterruptedException, IOException {
     assert canceler != null;
     final long txid;
     final NameNodeFile imageType;
@@ -210,11 +243,6 @@ public class StandbyCheckpointer {
       namesystem.cpUnlock();
     }
 
-    //early exit if we shouldn't actually send the checkpoint to the ANN
-    if(!sendCheckpoint){
-      return;
-    }
-
     // Upload the saved checkpoint back to the active
     // Do this in a separate thread to avoid blocking transition to active, but don't allow more
     // than the expected number of tasks to run or queue up
@@ -224,56 +252,70 @@ public class StandbyCheckpointer {
         uploadThreadFactory);
     // for right now, just match the upload to the nn address by convention. There is no need to
     // directly tie them together by adding a pair class.
-    List<Future<TransferFsImage.TransferResult>> uploads =
-        new ArrayList<Future<TransferFsImage.TransferResult>>();
+    HashMap<String, Future<TransferFsImage.TransferResult>> uploads =
+        new HashMap<>();
     for (final URL activeNNAddress : activeNNAddresses) {
-      Future<TransferFsImage.TransferResult> upload =
-          executor.submit(new Callable<TransferFsImage.TransferResult>() {
-            @Override
-            public TransferFsImage.TransferResult call()
-                throws IOException, InterruptedException {
-              CheckpointFaultInjector.getInstance().duringUploadInProgess();
-              return TransferFsImage.uploadImageFromStorage(activeNNAddress, conf, namesystem
-                  .getFSImage().getStorage(), imageType, txid, canceler);
-            }
-          });
-      uploads.add(upload);
+      // Upload image if at least 1 of 2 following conditions met:
+      // 1. has been quiet for long enough, try to contact the node.
+      // 2. this standby IS the primary checkpointer of target NN.
+      String addressString = activeNNAddress.toString();
+      assert checkpointReceivers.containsKey(addressString);
+      CheckpointReceiverEntry receiverEntry =
+          checkpointReceivers.get(addressString);
+      long secsSinceLastUpload =
+          TimeUnit.MILLISECONDS.toSeconds(
+              monotonicNow() - receiverEntry.getLastUploadTime());
+      boolean shouldUpload = receiverEntry.isPrimary() ||
+          secsSinceLastUpload >= checkpointConf.getQuietPeriod();
+      if (shouldUpload) {
+        Future<TransferFsImage.TransferResult> upload =
+            executor.submit(new Callable<TransferFsImage.TransferResult>() {
+              @Override
+              public TransferFsImage.TransferResult call()
+                  throws IOException, InterruptedException {
+                CheckpointFaultInjector.getInstance().duringUploadInProgess();
+                return TransferFsImage.uploadImageFromStorage(activeNNAddress,
+                    conf, namesystem.getFSImage().getStorage(), imageType, txid,
+                    canceler);
+              }
+            });
+        uploads.put(addressString, upload);
+      }
     }
     InterruptedException ie = null;
-    IOException ioe= null;
-    int i = 0;
-    boolean success = false;
-    for (; i < uploads.size(); i++) {
-      Future<TransferFsImage.TransferResult> upload = uploads.get(i);
+    List<IOException> ioes = Lists.newArrayList();
+    for (Map.Entry<String, Future<TransferFsImage.TransferResult>> entry :
+        uploads.entrySet()) {
+      String url = entry.getKey();
+      Future<TransferFsImage.TransferResult> upload = entry.getValue();
       try {
-        // TODO should there be some smarts here about retries nodes that are not the active NN?
+        // TODO should there be some smarts here about retries nodes that
+        //  are not the active NN?
+        CheckpointReceiverEntry receiverEntry = checkpointReceivers.get(url);
         if (upload.get() == TransferFsImage.TransferResult.SUCCESS) {
-          success = true;
-          //avoid getting the rest of the results - we don't care since we had a successful upload
-          break;
+          receiverEntry.setLastUploadTime(monotonicNow());
+          receiverEntry.setIsPrimary(true);
+        } else {
+          receiverEntry.setIsPrimary(false);
         }
-
       } catch (ExecutionException e) {
-        ioe = new IOException("Exception during image upload", e);
-        break;
+        // Even if exception happens, still proceeds to next NN url.
+        // so that fail to upload to previous NN does not cause the
+        // remaining NN not getting the fsImage.
+        ioes.add(new IOException("Exception during image upload", e));
       } catch (InterruptedException e) {
         ie = e;
         break;
       }
     }
-    if (ie == null && ioe == null) {
-      //Update only when response from remote about success or
-      lastUploadTime = monotonicNow();
-      // we are primary if we successfully updated the ANN
-      this.isPrimaryCheckPointer = success;
-    }
     // cleaner than copying code for multiple catch statements and better than catching all
     // exceptions, so we just handle the ones we expect.
-    if (ie != null || ioe != null) {
+    if (ie != null) {
 
       // cancel the rest of the tasks, and close the pool
-      for (; i < uploads.size(); i++) {
-        Future<TransferFsImage.TransferResult> upload = uploads.get(i);
+      for (Map.Entry<String, Future<TransferFsImage.TransferResult>> entry :
+          uploads.entrySet()) {
+        Future<TransferFsImage.TransferResult> upload = entry.getValue();
         // The background thread may be blocked waiting in the throttler, so
         // interrupt it.
         upload.cancel(true);
@@ -286,11 +328,11 @@ public class StandbyCheckpointer {
       executor.awaitTermination(500, TimeUnit.MILLISECONDS);
 
       // re-throw the exception we got, since one of these two must be non-null
-      if (ie != null) {
-        throw ie;
-      } else if (ioe != null) {
-        throw ioe;
-      }
+      throw ie;
+    }
+
+    if (!ioes.isEmpty()) {
+      throw MultipleIOException.createIOException(ioes);
     }
   }
   
@@ -373,7 +415,6 @@ public class StandbyCheckpointer {
       // Reset checkpoint time so that we don't always checkpoint
       // on startup.
       lastCheckpointTime = monotonicNow();
-      lastUploadTime = monotonicNow();
       while (shouldRun) {
         boolean needRollbackCheckpoint = namesystem.isNeedRollbackFsImage();
         if (!needRollbackCheckpoint) {
@@ -426,10 +467,7 @@ public class StandbyCheckpointer {
 
             // on all nodes, we build the checkpoint. However, we only ship the checkpoint if have a
             // rollback request, are the checkpointer, are outside the quiet period.
-            final long secsSinceLastUpload = (now - lastUploadTime) / 1000;
-            boolean sendRequest = isPrimaryCheckPointer
-                || secsSinceLastUpload >= checkpointConf.getQuietPeriod();
-            doCheckpoint(sendRequest);
+            doCheckpoint();
 
             // reset needRollbackCheckpoint to false only when we finish a ckpt
             // for rollback image

+ 4 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java

@@ -240,10 +240,10 @@ public class DFSAdmin extends FsShell {
         "\t\tThe storage type specific quota is cleared when -storageType " +
         "option is specified.\n" +
         "\t\tAvailable storageTypes are \n" +
-        "\t\t- RAM_DISK\n" +
         "\t\t- DISK\n" +
         "\t\t- SSD\n" +
-        "\t\t- ARCHIVE";
+        "\t\t- ARCHIVE\n" +
+        "\t\t- PROVIDED";
 
 
     private StorageType type;
@@ -304,10 +304,10 @@ public class DFSAdmin extends FsShell {
         "\t\t3. the directory does not exist or is a file.\n" +
         "\t\tThe storage type specific quota is set when -storageType option is specified.\n" +
         "\t\tAvailable storageTypes are \n" +
-        "\t\t- RAM_DISK\n" +
         "\t\t- DISK\n" +
         "\t\t- SSD\n" +
-        "\t\t- ARCHIVE";
+        "\t\t- ARCHIVE\n" +
+        "\t\t- PROVIDED";
 
     private long quota; // the quota to be set
     private StorageType type;

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsQuotaAdminGuide.md

@@ -96,7 +96,7 @@ Quotas are managed by a set of commands available only to the administrator.
     integer, the directory does not exist or it is a file, or the
     directory would immediately exceed the new quota. The storage type
     specific quota is set when -storageType option is specified. Available
-    storageTypes are RAM_DISK,DISK,SSD,ARCHIVE.
+    storageTypes are DISK,SSD,ARCHIVE,PROVIDED.
 
 *   `hdfs dfsadmin -clrSpaceQuota -storageType <storagetype> <directory>...<directory>`
 
@@ -104,7 +104,7 @@ Quotas are managed by a set of commands available only to the administrator.
     for each directory, with faults reported if the directory does not exist or
     it is a file. It is not a fault if the directory has no storage type quota on
     for storage type specified. The storage type specific quota is cleared when -storageType
-    option is specified. Available storageTypes are RAM_DISK,DISK,SSD,ARCHIVE.
+    option is specified. Available storageTypes are DISK,SSD,ARCHIVE,PROVIDED.
 
 Reporting Command
 -----------------

+ 5 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java

@@ -80,6 +80,7 @@ import com.google.common.collect.ArrayListMultimap;
 import com.google.common.collect.Multimap;
 import org.apache.hadoop.hdfs.server.common.blockaliasmap.BlockAliasMap;
 import org.apache.hadoop.hdfs.server.common.blockaliasmap.impl.InMemoryLevelDBAliasMapClient;
+import org.apache.hadoop.hdfs.server.namenode.ImageServlet;
 import org.apache.hadoop.http.HttpConfig;
 import org.apache.hadoop.security.ssl.KeyStoreTestUtil;
 import org.slf4j.Logger;
@@ -984,6 +985,8 @@ public class MiniDFSCluster implements AutoCloseable {
         }
        copyKeys(conf, nnConf, nnInfo.nameserviceId, nnInfo.nnId);
       }
+      nn.nameNode.getHttpServer()
+          .setAttribute(ImageServlet.RECENT_IMAGE_CHECK_ENABLED, false);
     }
   }
 
@@ -2199,6 +2202,8 @@ public class MiniDFSCluster implements AutoCloseable {
     }
 
     NameNode nn = NameNode.createNameNode(args, info.conf);
+    nn.getHttpServer()
+        .setAttribute(ImageServlet.RECENT_IMAGE_CHECK_ENABLED, false);
     info.nameNode = nn;
     info.setStartOpt(startOpt);
     if (waitActive) {

+ 55 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs.server.namenode;
 import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
 import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.assertNNHasCheckpoints;
 import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.getNameNodeCurrentDirs;
+import static org.apache.hadoop.hdfs.server.namenode.ImageServlet.RECENT_IMAGE_CHECK_ENABLED;
 import static org.apache.hadoop.test.MetricsAsserts.assertCounterGt;
 import static org.apache.hadoop.test.MetricsAsserts.assertGaugeGt;
 import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
@@ -2462,6 +2463,60 @@ public class TestCheckpoint {
     }
   }
 
+  @Test(timeout = 300000)
+  public void testActiveRejectSmallerDeltaImage() throws Exception {
+    MiniDFSCluster cluster = null;
+    Configuration conf = new HdfsConfiguration();
+    // Set the delta txid threshold to 10
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 10);
+    // Set the delta time threshold to some arbitrarily large value, so
+    // it does not trigger a checkpoint during this test.
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 900000);
+
+    SecondaryNameNode secondary = null;
+
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
+          .format(true).build();
+      // enable small delta rejection
+      NameNode active = cluster.getNameNode();
+      active.httpServer.getHttpServer()
+          .setAttribute(RECENT_IMAGE_CHECK_ENABLED, true);
+
+      secondary = startSecondaryNameNode(conf);
+
+      FileSystem fs = cluster.getFileSystem();
+      assertEquals(0, active.getNamesystem().getFSImage()
+          .getMostRecentCheckpointTxId());
+
+      // create 5 dir.
+      for (int i = 0; i < 5; i++) {
+        fs.mkdirs(new Path("dir-" + i));
+      }
+
+      // Checkpoint 1st
+      secondary.doCheckpoint();
+      // at this point, the txid delta is smaller than threshold 10.
+      // active does not accept this image.
+      assertEquals(0, active.getNamesystem().getFSImage()
+          .getMostRecentCheckpointTxId());
+
+      // create another 10 dir.
+      for (int i = 0; i < 10; i++) {
+        fs.mkdirs(new Path("dir2-" + i));
+      }
+
+      // Checkpoint 2nd
+      secondary.doCheckpoint();
+      // here the delta is large enough and active accepts this image.
+      assertEquals(21, active.getNamesystem().getFSImage()
+          .getMostRecentCheckpointTxId());
+    } finally {
+      cleanup(secondary);
+      cleanup(cluster);
+    }
+  }
+
   private static void cleanup(SecondaryNameNode snn) {
     if (snn != null) {
       try {

+ 34 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java

@@ -253,7 +253,40 @@ public class TestStandbyCheckpoints {
     dirs.addAll(FSImageTestUtil.getNameNodeCurrentDirs(cluster, 1));
     FSImageTestUtil.assertParallelFilesAreIdentical(dirs, ImmutableSet.<String>of());
   }
-  
+
+  /**
+   * Test for the case of when there are observer NameNodes, Standby node is
+   * able to upload fsImage to Observer node as well.
+   */
+  @Test(timeout = 300000)
+  public void testStandbyAndObserverState() throws Exception {
+    // Transition 2 to observer
+    cluster.transitionToObserver(2);
+    doEdits(0, 10);
+    // After a rollEditLog, Standby(nn1) 's next checkpoint would be
+    // ahead of observer(nn2).
+    nns[0].getRpcServer().rollEditLog();
+
+    // After standby creating a checkpoint, it will try to push the image to
+    // active and all observer, updating it's own txid to the most recent.
+    HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(12));
+    HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(12));
+    HATestUtil.waitForCheckpoint(cluster, 2, ImmutableList.of(12));
+
+    assertEquals(12, nns[2].getNamesystem().getFSImage()
+        .getMostRecentCheckpointTxId());
+    assertEquals(12, nns[1].getNamesystem().getFSImage()
+        .getMostRecentCheckpointTxId());
+
+    List<File> dirs = Lists.newArrayList();
+    // observer and standby both have this same image.
+    dirs.addAll(FSImageTestUtil.getNameNodeCurrentDirs(cluster, 2));
+    dirs.addAll(FSImageTestUtil.getNameNodeCurrentDirs(cluster, 1));
+    FSImageTestUtil.assertParallelFilesAreIdentical(dirs, ImmutableSet.of());
+    // Restore 2 back to standby
+    cluster.transitionToStandby(2);
+  }
+
   /**
    * Test for the case when the SBN is configured to checkpoint based
    * on a time period, but no transactions are happening on the

+ 4 - 4
hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml

@@ -16106,10 +16106,6 @@
           <type>RegexpComparator</type>
           <expected-output>^( |\t)*Available storageTypes are( )*</expected-output>
         </comparator>
-        <comparator>
-          <type>RegexpComparator</type>
-          <expected-output>^( |\t)*- RAM_DISK( )*</expected-output>
-        </comparator>
         <comparator>
           <type>RegexpComparator</type>
           <expected-output>^( |\t)*- DISK( )*</expected-output>
@@ -16122,6 +16118,10 @@
           <type>RegexpComparator</type>
           <expected-output>^( |\t)*- ARCHIVE( )*</expected-output>
         </comparator>
+        <comparator>
+          <type>RegexpComparator</type>
+          <expected-output>^( |\t)*- PROVIDED( )*</expected-output>
+        </comparator>
       </comparators>
     </test>
 

+ 3 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java

@@ -119,7 +119,9 @@ public class MiniMRYarnCluster extends MiniYARNCluster {
   @Override
   public void serviceInit(Configuration conf) throws Exception {
     conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
-    if (conf.get(MRJobConfig.MR_AM_STAGING_DIR) == null) {
+    String stagingDir = conf.get(MRJobConfig.MR_AM_STAGING_DIR);
+    if (stagingDir == null ||
+        stagingDir.equals(MRJobConfig.DEFAULT_MR_AM_STAGING_DIR)) {
       conf.set(MRJobConfig.MR_AM_STAGING_DIR, new File(getTestWorkDir(),
           "apps_staging_dir/").getAbsolutePath());
     }

+ 3 - 0
hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/audit/OMAction.java

@@ -39,6 +39,9 @@ public enum OMAction implements AuditAction {
   UPDATE_KEY,
   PURGE_KEYS,
 
+  // S3 Bucket
+  CREATE_S3_BUCKET,
+
   // READ Actions
   CHECK_VOLUME_ACCESS,
   LIST_BUCKETS,

+ 2 - 0
hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/exceptions/OMException.java

@@ -203,5 +203,7 @@ public class OMException extends IOException {
 
     PREFIX_NOT_FOUND,
 
+    S3_BUCKET_INVALID_LENGTH
+
   }
 }

+ 0 - 49
hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmDeleteVolumeResponse.java

@@ -1,49 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * <p>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.ozone.om.helpers;
-
-import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
-    .VolumeList;
-
-/**
- * OM response for delete volume request for a ozone volume.
- */
-public  class OmDeleteVolumeResponse {
-  private String volume;
-  private String owner;
-  private VolumeList updatedVolumeList;
-
-  public OmDeleteVolumeResponse(String volume, String owner,
-      VolumeList updatedVolumeList) {
-    this.volume = volume;
-    this.owner = owner;
-    this.updatedVolumeList = updatedVolumeList;
-  }
-
-  public String getVolume() {
-    return volume;
-  }
-
-  public String getOwner() {
-    return owner;
-  }
-
-  public VolumeList getUpdatedVolumeList() {
-    return updatedVolumeList;
-  }
-}

+ 0 - 56
hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmVolumeOwnerChangeResponse.java

@@ -1,56 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership.  The ASF
- * licenses this file to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * <p>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-package org.apache.hadoop.ozone.om.helpers;
-
-import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
-    .VolumeList;
-
-/**
- * OM response for owner change request for a ozone volume.
- */
-public class OmVolumeOwnerChangeResponse {
-  private VolumeList originalOwnerVolumeList;
-  private VolumeList newOwnerVolumeList;
-  private OmVolumeArgs newOwnerVolumeArgs;
-  private String originalOwner;
-
-  public OmVolumeOwnerChangeResponse(VolumeList originalOwnerVolumeList,
-      VolumeList newOwnerVolumeList, OmVolumeArgs newOwnerVolumeArgs,
-      String originalOwner) {
-    this.originalOwnerVolumeList = originalOwnerVolumeList;
-    this.newOwnerVolumeList = newOwnerVolumeList;
-    this.newOwnerVolumeArgs = newOwnerVolumeArgs;
-    this.originalOwner = originalOwner;
-  }
-
-  public String getOriginalOwner() {
-    return originalOwner;
-  }
-
-  public VolumeList getOriginalOwnerVolumeList() {
-    return originalOwnerVolumeList;
-  }
-
-  public VolumeList getNewOwnerVolumeList() {
-    return newOwnerVolumeList;
-  }
-
-  public OmVolumeArgs getNewOwnerVolumeArgs() {
-    return newOwnerVolumeArgs;
-  }
-}

+ 0 - 77
hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OzoneManagerHAProtocol.java

@@ -18,13 +18,8 @@
 
 package org.apache.hadoop.ozone.om.protocol;
 
-import org.apache.hadoop.ozone.om.helpers.OmDeleteVolumeResponse;
 import org.apache.hadoop.ozone.om.helpers.OmKeyArgs;
 import org.apache.hadoop.ozone.om.helpers.OmMultipartInfo;
-import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs;
-import org.apache.hadoop.ozone.om.helpers.OmVolumeOwnerChangeResponse;
-import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
-    .VolumeList;
 
 import java.io.IOException;
 
@@ -54,76 +49,4 @@ public interface OzoneManagerHAProtocol {
   OmMultipartInfo applyInitiateMultipartUpload(OmKeyArgs omKeyArgs,
       String multipartUploadID) throws IOException;
 
-  /**
-   * Start Create Volume Transaction.
-   * @param omVolumeArgs
-   * @return VolumeList
-   * @throws IOException
-   */
-  VolumeList startCreateVolume(OmVolumeArgs omVolumeArgs) throws IOException;
-
-  /**
-   * Apply Create Volume changes to OM DB.
-   * @param omVolumeArgs
-   * @param volumeList
-   * @throws IOException
-   */
-  void applyCreateVolume(OmVolumeArgs omVolumeArgs,
-      VolumeList volumeList) throws IOException;
-
-  /**
-   * Start setOwner Transaction.
-   * @param volume
-   * @param owner
-   * @return OmVolumeOwnerChangeResponse
-   * @throws IOException
-   */
-  OmVolumeOwnerChangeResponse startSetOwner(String volume,
-      String owner) throws IOException;
-
-  /**
-   * Apply Set Quota changes to OM DB.
-   * @param oldOwner
-   * @param oldOwnerVolumeList
-   * @param newOwnerVolumeList
-   * @param newOwnerVolumeArgs
-   * @throws IOException
-   */
-  void applySetOwner(String oldOwner, VolumeList oldOwnerVolumeList,
-      VolumeList newOwnerVolumeList, OmVolumeArgs newOwnerVolumeArgs)
-      throws IOException;
-
-  /**
-   * Start Set Quota Transaction.
-   * @param volume
-   * @param quota
-   * @return OmVolumeArgs
-   * @throws IOException
-   */
-  OmVolumeArgs startSetQuota(String volume, long quota) throws IOException;
-
-  /**
-   * Apply Set Quota Changes to OM DB.
-   * @param omVolumeArgs
-   * @throws IOException
-   */
-  void applySetQuota(OmVolumeArgs omVolumeArgs) throws IOException;
-
-  /**
-   * Start Delete Volume Transaction.
-   * @param volume
-   * @return OmDeleteVolumeResponse
-   * @throws IOException
-   */
-  OmDeleteVolumeResponse startDeleteVolume(String volume) throws IOException;
-
-  /**
-   * Apply Delete Volume changes to OM DB.
-   * @param volume
-   * @param owner
-   * @param newVolumeList
-   * @throws IOException
-   */
-  void applyDeleteVolume(String volume, String owner,
-      VolumeList newVolumeList) throws IOException;
 }

+ 13 - 7
hadoop-ozone/common/src/main/proto/OzoneManagerProtocol.proto

@@ -279,6 +279,8 @@ enum Status {
     PERMISSION_DENIED = 48;
     TIMEOUT = 49;
     PREFIX_NOT_FOUND=50;
+
+    S3_BUCKET_INVALID_LENGTH = 51; // s3 bucket invalid length.
 }
 
 
@@ -307,7 +309,6 @@ message UserInfo {
 */
 message CreateVolumeRequest {
     required VolumeInfo volumeInfo = 1;
-    optional VolumeList volumeList = 2;
 }
 
 message CreateVolumeResponse {
@@ -325,10 +326,6 @@ message SetVolumePropertyRequest {
     required string volumeName = 1;
     optional string ownerName = 2;
     optional uint64 quotaInBytes = 3;
-    optional string originalOwner = 4;
-    optional VolumeList oldOwnerVolumeList = 5;
-    optional VolumeList newOwnerVolumeList = 6;
-    optional VolumeInfo volumeInfo = 7;
 }
 
 message SetVolumePropertyResponse {
@@ -365,8 +362,6 @@ message InfoVolumeResponse {
 */
 message DeleteVolumeRequest {
     required string volumeName = 1;
-    optional string owner = 2;
-    optional VolumeList volumeList = 3;
 }
 
 message DeleteVolumeResponse {
@@ -867,6 +862,17 @@ message ServiceInfo {
 message S3CreateBucketRequest {
     required string userName = 1;
     required string s3bucketname = 2;
+    // This will be set during OM HA by one of the OM node. In future if more
+    // data fields are required to create volume/bucket we can add them to
+    // this. This is the reason for creating a new message type for this.
+    // S3CreateBucket means create volume from userName and create bucket
+    // with s3BucketName.
+    optional S3CreateVolumeInfo s3CreateVolumeInfo = 3;
+}
+
+message S3CreateVolumeInfo {
+    // Creation time set in preExecute on one of the OM node.
+    required uint64 creationTime = 1;
 }
 
 message S3CreateBucketResponse {

+ 156 - 0
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/Test2WayCommitInRatis.java

@@ -0,0 +1,156 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership.  The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.hadoop.ozone.client.rpc;
+
+import org.apache.hadoop.conf.StorageUnit;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import org.apache.hadoop.hdds.scm.XceiverClientManager;
+import org.apache.hadoop.hdds.scm.XceiverClientRatis;
+import org.apache.hadoop.hdds.scm.XceiverClientReply;
+import org.apache.hadoop.hdds.scm.XceiverClientSpi;
+import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline;
+import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
+import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB;
+import org.apache.hadoop.ozone.MiniOzoneCluster;
+import org.apache.hadoop.ozone.OzoneConfigKeys;
+import org.apache.hadoop.ozone.client.ObjectStore;
+import org.apache.hadoop.ozone.client.OzoneClient;
+import org.apache.hadoop.ozone.client.OzoneClientFactory;
+import org.apache.hadoop.ozone.container.ContainerTestHelper;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_SCM_WATCHER_TIMEOUT;
+
+/**
+ * This class tests the 2 way commit in Ratis.
+ */
+public class Test2WayCommitInRatis {
+
+  private MiniOzoneCluster cluster;
+  private OzoneClient client;
+  private ObjectStore objectStore;
+  private String volumeName;
+  private String bucketName;
+  private int chunkSize;
+  private int flushSize;
+  private int maxFlushSize;
+  private int blockSize;
+  private StorageContainerLocationProtocolClientSideTranslatorPB
+      storageContainerLocationClient;
+  private static String containerOwner = "OZONE";
+
+  /**
+   * Create a MiniDFSCluster for testing.
+   * <p>
+   * Ozone is made active by setting OZONE_ENABLED = true
+   *
+   * @throws IOException
+   */
+  private void startCluster(OzoneConfiguration conf) throws Exception {
+    chunkSize = 100;
+    flushSize = 2 * chunkSize;
+    maxFlushSize = 2 * flushSize;
+    blockSize = 2 * maxFlushSize;
+
+    conf.setTimeDuration(HDDS_SCM_WATCHER_TIMEOUT, 1000, TimeUnit.MILLISECONDS);
+    conf.setTimeDuration(
+        OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_KEY,
+        1, TimeUnit.SECONDS);
+
+    conf.setQuietMode(false);
+    cluster = MiniOzoneCluster.newBuilder(conf)
+        .setNumDatanodes(7)
+        .setBlockSize(blockSize)
+        .setChunkSize(chunkSize)
+        .setStreamBufferFlushSize(flushSize)
+        .setStreamBufferMaxSize(maxFlushSize)
+        .setStreamBufferSizeUnit(StorageUnit.BYTES)
+        .build();
+    cluster.waitForClusterToBeReady();
+    //the easiest way to create an open container is creating a key
+    client = OzoneClientFactory.getClient(conf);
+    objectStore = client.getObjectStore();
+    volumeName = "watchforcommithandlingtest";
+    bucketName = volumeName;
+    objectStore.createVolume(volumeName);
+    objectStore.getVolume(volumeName).createBucket(bucketName);
+    storageContainerLocationClient = cluster
+        .getStorageContainerLocationClient();
+  }
+
+
+  /**
+   * Shutdown MiniDFSCluster.
+   */
+  private void shutdown() {
+    if (cluster != null) {
+      cluster.shutdown();
+    }
+  }
+
+
+  @Test
+  public void test2WayCommitForRetryfailure() throws Exception {
+    OzoneConfiguration conf = new OzoneConfiguration();
+    conf.setTimeDuration(OzoneConfigKeys.OZONE_CLIENT_WATCH_REQUEST_TIMEOUT, 20,
+        TimeUnit.SECONDS);
+    conf.setInt(OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY, 20);
+    startCluster(conf);
+    GenericTestUtils.LogCapturer logCapturer =
+        GenericTestUtils.LogCapturer.captureLogs(XceiverClientRatis.LOG);
+    XceiverClientManager clientManager = new XceiverClientManager(conf);
+
+    ContainerWithPipeline container1 = storageContainerLocationClient
+        .allocateContainer(HddsProtos.ReplicationType.RATIS,
+            HddsProtos.ReplicationFactor.THREE, containerOwner);
+    XceiverClientSpi xceiverClient = clientManager
+        .acquireClient(container1.getPipeline());
+    Assert.assertEquals(1, xceiverClient.getRefcount());
+    Assert.assertEquals(container1.getPipeline(),
+        xceiverClient.getPipeline());
+    Pipeline pipeline = xceiverClient.getPipeline();
+    XceiverClientRatis ratisClient = (XceiverClientRatis) xceiverClient;
+    XceiverClientReply reply = xceiverClient.sendCommandAsync(
+        ContainerTestHelper.getCreateContainerRequest(
+            container1.getContainerInfo().getContainerID(),
+            xceiverClient.getPipeline()));
+    reply.getResponse().get();
+    Assert.assertEquals(3, ratisClient.getCommitInfoMap().size());
+    cluster.shutdownHddsDatanode(pipeline.getNodes().get(0));
+    reply = xceiverClient.sendCommandAsync(ContainerTestHelper
+        .getCloseContainer(pipeline,
+            container1.getContainerInfo().getContainerID()));
+    reply.getResponse().get();
+    xceiverClient.watchForCommit(reply.getLogIndex(), 20000);
+
+    // commitInfo Map will be reduced to 2 here
+    Assert.assertEquals(2, ratisClient.getCommitInfoMap().size());
+    clientManager.releaseClient(xceiverClient, false);
+    Assert.assertTrue(logCapturer.getOutput().contains("3 way commit failed"));
+    Assert
+        .assertTrue(logCapturer.getOutput().contains("Committed by majority"));
+    logCapturer.stopCapturing();
+    shutdown();
+  }
+}

+ 8 - 44
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestFailureHandlingByClient.java

@@ -22,6 +22,7 @@ import org.apache.hadoop.hdds.client.ReplicationType;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import org.apache.hadoop.hdds.scm.ScmConfigKeys;
 import org.apache.hadoop.hdds.scm.container.ContainerID;
 import org.apache.hadoop.hdds.scm.container.ContainerInfo;
 import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
@@ -81,11 +82,16 @@ public class TestFailureHandlingByClient {
     conf.setTimeDuration(OzoneConfigKeys.OZONE_CLIENT_WATCH_REQUEST_TIMEOUT, 5,
         TimeUnit.SECONDS);
     conf.setTimeDuration(HDDS_SCM_WATCHER_TIMEOUT, 1000, TimeUnit.MILLISECONDS);
-    conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 3, TimeUnit.SECONDS);
-    conf.setInt(OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY, 5);
+    conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 100, TimeUnit.SECONDS);
+    conf.setInt(OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY, 10);
     conf.setTimeDuration(
         OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_KEY,
         1, TimeUnit.SECONDS);
+    conf.setTimeDuration(
+        OzoneConfigKeys.DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY,
+        1, TimeUnit.SECONDS);
+    conf.setBoolean(
+        ScmConfigKeys.DFS_NETWORK_TOPOLOGY_AWARE_READ_ENABLED, false);
 
     conf.setQuietMode(false);
     cluster = MiniOzoneCluster.newBuilder(conf)
@@ -156,48 +162,6 @@ public class TestFailureHandlingByClient {
     shutdown();
   }
 
-  @Test
-  public void testMultiBlockWritesWithDnFailures() throws Exception {
-    startCluster();
-    String keyName = "ratis3";
-    OzoneOutputStream key = createKey(keyName, ReplicationType.RATIS, 0);
-    String data =
-        ContainerTestHelper
-        .getFixedLengthString(keyString, blockSize + chunkSize);
-    key.write(data.getBytes());
-
-    // get the name of a valid container
-    Assert.assertTrue(key.getOutputStream() instanceof KeyOutputStream);
-    KeyOutputStream groupOutputStream =
-        (KeyOutputStream) key.getOutputStream();
-    List<OmKeyLocationInfo> locationInfoList =
-        groupOutputStream.getLocationInfoList();
-    Assert.assertTrue(locationInfoList.size() == 2);
-    long containerId = locationInfoList.get(1).getContainerID();
-    ContainerInfo container = cluster.getStorageContainerManager()
-        .getContainerManager()
-        .getContainer(ContainerID.valueof(containerId));
-    Pipeline pipeline =
-        cluster.getStorageContainerManager().getPipelineManager()
-            .getPipeline(container.getPipelineID());
-    List<DatanodeDetails> datanodes = pipeline.getNodes();
-    cluster.shutdownHddsDatanode(datanodes.get(0));
-    cluster.shutdownHddsDatanode(datanodes.get(1));
-
-    // The write will fail but exception will be handled and length will be
-    // updated correctly in OzoneManager once the steam is closed
-    key.write(data.getBytes());
-    key.close();
-    OmKeyArgs keyArgs = new OmKeyArgs.Builder().setVolumeName(volumeName)
-        .setBucketName(bucketName).setType(HddsProtos.ReplicationType.RATIS)
-        .setFactor(HddsProtos.ReplicationFactor.THREE).setKeyName(keyName)
-        .setRefreshPipeline(true)
-        .build();
-    OmKeyInfo keyInfo = cluster.getOzoneManager().lookupKey(keyArgs);
-    Assert.assertEquals(2 * data.getBytes().length, keyInfo.getDataSize());
-    validateData(keyName, data.concat(data).getBytes());
-    shutdown();
-  }
 
   @Test
   public void testMultiBlockWritesWithIntermittentDnFailures()

+ 168 - 0
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestMultiBlockWritesWithDnFailures.java

@@ -0,0 +1,168 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership.  The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.hadoop.ozone.client.rpc;
+
+import org.apache.hadoop.hdds.client.ReplicationType;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import org.apache.hadoop.hdds.scm.container.ContainerID;
+import org.apache.hadoop.hdds.scm.container.ContainerInfo;
+import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
+import org.apache.hadoop.ozone.MiniOzoneCluster;
+import org.apache.hadoop.ozone.OzoneConfigKeys;
+import org.apache.hadoop.ozone.OzoneConsts;
+import org.apache.hadoop.ozone.client.ObjectStore;
+import org.apache.hadoop.ozone.client.OzoneClient;
+import org.apache.hadoop.ozone.client.OzoneClientFactory;
+import org.apache.hadoop.ozone.client.io.KeyOutputStream;
+import org.apache.hadoop.ozone.client.io.OzoneOutputStream;
+import org.apache.hadoop.ozone.container.ContainerTestHelper;
+import org.apache.hadoop.ozone.om.helpers.OmKeyArgs;
+import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
+import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_SCM_WATCHER_TIMEOUT;
+import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL;
+
+/**
+ * Tests MultiBlock Writes with Dn failures by Ozone Client.
+ */
+public class TestMultiBlockWritesWithDnFailures {
+
+  private MiniOzoneCluster cluster;
+  private OzoneConfiguration conf;
+  private OzoneClient client;
+  private ObjectStore objectStore;
+  private int chunkSize;
+  private int blockSize;
+  private String volumeName;
+  private String bucketName;
+  private String keyString;
+  private int maxRetries;
+
+  /**
+   * Create a MiniDFSCluster for testing.
+   * <p>
+   * Ozone is made active by setting OZONE_ENABLED = true
+   *
+   * @throws IOException
+   */
+  private void init() throws Exception {
+    conf = new OzoneConfiguration();
+    maxRetries = 100;
+    chunkSize = (int) OzoneConsts.MB;
+    blockSize = 4 * chunkSize;
+    conf.setTimeDuration(OzoneConfigKeys.OZONE_CLIENT_WATCH_REQUEST_TIMEOUT, 5,
+        TimeUnit.SECONDS);
+    conf.setTimeDuration(HDDS_SCM_WATCHER_TIMEOUT, 1000, TimeUnit.MILLISECONDS);
+    conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 100, TimeUnit.SECONDS);
+    conf.setInt(OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY, 5);
+    conf.setTimeDuration(
+        OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_RETRY_INTERVAL_KEY,
+        1, TimeUnit.SECONDS);
+
+    conf.setQuietMode(false);
+    cluster = MiniOzoneCluster.newBuilder(conf)
+        .setNumDatanodes(6).build();
+    cluster.waitForClusterToBeReady();
+    //the easiest way to create an open container is creating a key
+    client = OzoneClientFactory.getClient(conf);
+    objectStore = client.getObjectStore();
+    keyString = UUID.randomUUID().toString();
+    volumeName = "datanodefailurehandlingtest";
+    bucketName = volumeName;
+    objectStore.createVolume(volumeName);
+    objectStore.getVolume(volumeName).createBucket(bucketName);
+  }
+
+  private void startCluster() throws Exception {
+    init();
+  }
+
+  /**
+   * Shutdown MiniDFSCluster.
+   */
+  private void shutdown() {
+    if (cluster != null) {
+      cluster.shutdown();
+    }
+  }
+
+  @Test
+  public void testMultiBlockWritesWithDnFailures() throws Exception {
+    startCluster();
+    String keyName = "ratis3";
+    OzoneOutputStream key = createKey(keyName, ReplicationType.RATIS, 0);
+    String data =
+        ContainerTestHelper
+            .getFixedLengthString(keyString, blockSize + chunkSize);
+    key.write(data.getBytes());
+
+    // get the name of a valid container
+    Assert.assertTrue(key.getOutputStream() instanceof KeyOutputStream);
+    KeyOutputStream groupOutputStream =
+        (KeyOutputStream) key.getOutputStream();
+    List<OmKeyLocationInfo> locationInfoList =
+        groupOutputStream.getLocationInfoList();
+    Assert.assertTrue(locationInfoList.size() == 2);
+    long containerId = locationInfoList.get(1).getContainerID();
+    ContainerInfo container = cluster.getStorageContainerManager()
+        .getContainerManager()
+        .getContainer(ContainerID.valueof(containerId));
+    Pipeline pipeline =
+        cluster.getStorageContainerManager().getPipelineManager()
+            .getPipeline(container.getPipelineID());
+    List<DatanodeDetails> datanodes = pipeline.getNodes();
+    cluster.shutdownHddsDatanode(datanodes.get(0));
+    cluster.shutdownHddsDatanode(datanodes.get(1));
+
+    // The write will fail but exception will be handled and length will be
+    // updated correctly in OzoneManager once the steam is closed
+    key.write(data.getBytes());
+    key.close();
+    OmKeyArgs keyArgs = new OmKeyArgs.Builder().setVolumeName(volumeName)
+        .setBucketName(bucketName).setType(HddsProtos.ReplicationType.RATIS)
+        .setFactor(HddsProtos.ReplicationFactor.THREE).setKeyName(keyName)
+        .setRefreshPipeline(true)
+        .build();
+    OmKeyInfo keyInfo = cluster.getOzoneManager().lookupKey(keyArgs);
+    Assert.assertEquals(2 * data.getBytes().length, keyInfo.getDataSize());
+    validateData(keyName, data.concat(data).getBytes());
+    shutdown();
+  }
+
+  private OzoneOutputStream createKey(String keyName, ReplicationType type,
+      long size) throws Exception {
+    return ContainerTestHelper
+        .createKey(keyName, type, size, objectStore, volumeName, bucketName);
+  }
+
+  private void validateData(String keyName, byte[] data) throws Exception {
+    ContainerTestHelper
+        .validateData(keyName, data, objectStore, volumeName, bucketName);
+  }
+
+}

+ 31 - 69
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/client/rpc/TestWatchForCommit.java

@@ -46,7 +46,9 @@ import java.io.IOException;
 import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Random;
 import java.util.UUID;
+import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 
@@ -135,7 +137,10 @@ public class TestWatchForCommit {
     OzoneConfiguration conf = new OzoneConfiguration();
     conf.setTimeDuration(OzoneConfigKeys.OZONE_CLIENT_WATCH_REQUEST_TIMEOUT, 20,
         TimeUnit.SECONDS);
-    conf.setInt(OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY, 5);
+    conf.setInt(OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY, 20);
+    conf.setTimeDuration(
+        OzoneConfigKeys.DFS_RATIS_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY,
+        1, TimeUnit.SECONDS);
     startCluster(conf);
     XceiverClientMetrics metrics =
         XceiverClientManager.getXceiverClientMetrics();
@@ -178,31 +183,24 @@ public class TestWatchForCommit {
         .getOutputStream();
     Assert.assertTrue(stream instanceof BlockOutputStream);
     BlockOutputStream blockOutputStream = (BlockOutputStream) stream;
-
     // we have just written data more than flush Size(2 chunks), at this time
     // buffer pool will have 3 buffers allocated worth of chunk size
-
     Assert.assertEquals(4, blockOutputStream.getBufferPool().getSize());
     // writtenDataLength as well flushedDataLength will be updated here
     Assert.assertEquals(dataLength, blockOutputStream.getWrittenDataLength());
-
     Assert.assertEquals(maxFlushSize,
         blockOutputStream.getTotalDataFlushedLength());
-
     // since data equals to maxBufferSize is written, this will be a blocking
     // call and hence will wait for atleast flushSize worth of data to get
     // acked by all servers right here
     Assert.assertTrue(blockOutputStream.getTotalAckDataLength() >= flushSize);
-
     // watchForCommit will clean up atleast one entry from the map where each
     // entry corresponds to flushSize worth of data
     Assert.assertTrue(
         blockOutputStream.getCommitIndex2flushedDataMap().size() <= 1);
-
     // Now do a flush. This will flush the data and update the flush length and
     // the map.
     key.flush();
-
     Assert.assertEquals(pendingWriteChunkCount,
         metrics.getContainerOpsMetrics(ContainerProtos.Type.WriteChunk));
     Assert.assertEquals(pendingPutBlockCount,
@@ -213,19 +211,15 @@ public class TestWatchForCommit {
         metrics.getContainerOpCountMetrics(ContainerProtos.Type.PutBlock));
     Assert.assertEquals(totalOpCount + 8,
         metrics.getTotalOpCount());
-
     // Since the data in the buffer is already flushed, flush here will have
     // no impact on the counters and data structures
-
     Assert.assertEquals(4, blockOutputStream.getBufferPool().getSize());
     Assert.assertEquals(dataLength, blockOutputStream.getWrittenDataLength());
-
     Assert.assertEquals(dataLength,
         blockOutputStream.getTotalDataFlushedLength());
     // flush will make sure one more entry gets updated in the map
     Assert.assertTrue(
         blockOutputStream.getCommitIndex2flushedDataMap().size() <= 2);
-
     XceiverClientRatis raftClient =
         (XceiverClientRatis) blockOutputStream.getXceiverClient();
     Assert.assertEquals(3, raftClient.getCommitInfoMap().size());
@@ -235,11 +229,9 @@ public class TestWatchForCommit {
     // again write data with more than max buffer limit. This will call
     // watchForCommit again. Since the commit will happen 2 way, the
     // commitInfoMap will get updated for servers which are alive
-
     // 4 writeChunks = maxFlushSize + 2 putBlocks  will be discarded here
     // once exception is hit
     key.write(data1);
-
     // As a part of handling the exception, 4 failed writeChunks  will be
     // rewritten plus one partial chunk plus two putBlocks for flushSize
     // and one flush for partial chunk
@@ -282,7 +274,7 @@ public class TestWatchForCommit {
     OzoneConfiguration conf = new OzoneConfiguration();
     conf.setTimeDuration(OzoneConfigKeys.OZONE_CLIENT_WATCH_REQUEST_TIMEOUT, 3,
         TimeUnit.SECONDS);
-    conf.setInt(OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY, 10);
+    conf.setInt(OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY, 20);
     startCluster(conf);
     XceiverClientManager clientManager = new XceiverClientManager(conf);
     ContainerWithPipeline container1 = storageContainerLocationClient
@@ -303,8 +295,11 @@ public class TestWatchForCommit {
     cluster.shutdownHddsDatanode(pipeline.getNodes().get(0));
     cluster.shutdownHddsDatanode(pipeline.getNodes().get(1));
     try {
-      // just watch for a lo index which in not updated in the commitInfo Map
-      xceiverClient.watchForCommit(index + 1, 3000);
+      // just watch for a log index which in not updated in the commitInfo Map
+      // as well as there is no logIndex generate in Ratis.
+      // The basic idea here is just to test if its throws an exception.
+      xceiverClient
+          .watchForCommit(index + new Random().nextInt(100) + 10, 3000);
       Assert.fail("expected exception not thrown");
     } catch (Exception e) {
       Assert.assertTrue(
@@ -321,7 +316,7 @@ public class TestWatchForCommit {
     OzoneConfiguration conf = new OzoneConfiguration();
     conf.setTimeDuration(OzoneConfigKeys.OZONE_CLIENT_WATCH_REQUEST_TIMEOUT,
         100, TimeUnit.SECONDS);
-    conf.setInt(OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY, 10);
+    conf.setInt(OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY, 20);
     startCluster(conf);
     XceiverClientManager clientManager = new XceiverClientManager(conf);
     ContainerWithPipeline container1 = storageContainerLocationClient
@@ -343,67 +338,30 @@ public class TestWatchForCommit {
     cluster.shutdownHddsDatanode(pipeline.getNodes().get(1));
     // again write data with more than max buffer limit. This wi
     try {
-      // just watch for a lo index which in not updated in the commitInfo Map
-      xceiverClient.watchForCommit(index + 1, 20000);
+      // just watch for a log index which in not updated in the commitInfo Map
+      // as well as there is no logIndex generate in Ratis.
+      // The basic idea here is just to test if its throws an exception.
+      xceiverClient
+          .watchForCommit(index + new Random().nextInt(100) + 10, 20000);
       Assert.fail("expected exception not thrown");
     } catch (Exception e) {
-      Assert.assertTrue(HddsClientUtils
-          .checkForException(e) instanceof RaftRetryFailureException);
+      Assert.assertTrue(e instanceof ExecutionException);
+      // since the timeout value is quite long, the watch request will either
+      // fail with NotReplicated exceptio, RetryFailureException or
+      // RuntimeException
+      Assert.assertFalse(HddsClientUtils
+          .checkForException(e) instanceof TimeoutException);
     }
     clientManager.releaseClient(xceiverClient, false);
     shutdown();
   }
 
-  @Test
-  public void test2WayCommitForRetryfailure() throws Exception {
-    OzoneConfiguration conf = new OzoneConfiguration();
-    conf.setTimeDuration(OzoneConfigKeys.OZONE_CLIENT_WATCH_REQUEST_TIMEOUT, 20,
-        TimeUnit.SECONDS);
-    conf.setInt(OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY, 8);
-    startCluster(conf);
-    GenericTestUtils.LogCapturer logCapturer =
-        GenericTestUtils.LogCapturer.captureLogs(XceiverClientRatis.LOG);
-    XceiverClientManager clientManager = new XceiverClientManager(conf);
-
-    ContainerWithPipeline container1 = storageContainerLocationClient
-        .allocateContainer(HddsProtos.ReplicationType.RATIS,
-            HddsProtos.ReplicationFactor.THREE, containerOwner);
-    XceiverClientSpi xceiverClient = clientManager
-        .acquireClient(container1.getPipeline());
-    Assert.assertEquals(1, xceiverClient.getRefcount());
-    Assert.assertEquals(container1.getPipeline(),
-        xceiverClient.getPipeline());
-    Pipeline pipeline = xceiverClient.getPipeline();
-    XceiverClientRatis ratisClient = (XceiverClientRatis) xceiverClient;
-    XceiverClientReply reply = xceiverClient.sendCommandAsync(
-        ContainerTestHelper.getCreateContainerRequest(
-            container1.getContainerInfo().getContainerID(),
-            xceiverClient.getPipeline()));
-    reply.getResponse().get();
-    Assert.assertEquals(3, ratisClient.getCommitInfoMap().size());
-    cluster.shutdownHddsDatanode(pipeline.getNodes().get(0));
-    reply = xceiverClient.sendCommandAsync(ContainerTestHelper
-        .getCloseContainer(pipeline,
-            container1.getContainerInfo().getContainerID()));
-    reply.getResponse().get();
-    xceiverClient.watchForCommit(reply.getLogIndex(), 20000);
-
-    // commitInfo Map will be reduced to 2 here
-    Assert.assertEquals(2, ratisClient.getCommitInfoMap().size());
-    clientManager.releaseClient(xceiverClient, false);
-    Assert.assertTrue(logCapturer.getOutput().contains("3 way commit failed"));
-    Assert
-        .assertTrue(logCapturer.getOutput().contains("Committed by majority"));
-    logCapturer.stopCapturing();
-    shutdown();
-  }
-
   @Test
   public void test2WayCommitForTimeoutException() throws Exception {
     OzoneConfiguration conf = new OzoneConfiguration();
     conf.setTimeDuration(OzoneConfigKeys.OZONE_CLIENT_WATCH_REQUEST_TIMEOUT, 3,
         TimeUnit.SECONDS);
-    conf.setInt(OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY, 10);
+    conf.setInt(OzoneConfigKeys.DFS_RATIS_CLIENT_REQUEST_MAX_RETRIES_KEY, 20);
     startCluster(conf);
     GenericTestUtils.LogCapturer logCapturer =
         GenericTestUtils.LogCapturer.captureLogs(XceiverClientRatis.LOG);
@@ -477,8 +435,12 @@ public class TestWatchForCommit {
     pipelineList.add(pipeline);
     ContainerTestHelper.waitForPipelineClose(pipelineList, cluster);
     try {
-      // just watch for a lo index which in not updated in the commitInfo Map
-      xceiverClient.watchForCommit(reply.getLogIndex() + 1, 20000);
+      // just watch for a log index which in not updated in the commitInfo Map
+      // as well as there is no logIndex generate in Ratis.
+      // The basic idea here is just to test if its throws an exception.
+      xceiverClient
+          .watchForCommit(reply.getLogIndex() + new Random().nextInt(100) + 10,
+              20000);
       Assert.fail("Expected exception not thrown");
     } catch(Exception e) {
       Assert.assertTrue(HddsClientUtils

+ 3 - 3
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmMetrics.java

@@ -87,11 +87,11 @@ public class TestOmMetrics {
             ozoneManager, "volumeManager");
     VolumeManager mockVm = Mockito.spy(volumeManager);
 
-    Mockito.doReturn(null).when(mockVm).createVolume(null);
-    Mockito.doReturn(null).when(mockVm).deleteVolume(null);
+    Mockito.doNothing().when(mockVm).createVolume(null);
+    Mockito.doNothing().when(mockVm).deleteVolume(null);
     Mockito.doReturn(null).when(mockVm).getVolumeInfo(null);
     Mockito.doReturn(true).when(mockVm).checkVolumeAccess(null, null);
-    Mockito.doReturn(null).when(mockVm).setOwner(null, null);
+    Mockito.doNothing().when(mockVm).setOwner(null, null);
     Mockito.doReturn(null).when(mockVm).listVolumes(null, null, null, 0);
 
     HddsWhiteboxTestUtils.setInternalState(

+ 0 - 3
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/BucketManagerImpl.java

@@ -65,8 +65,6 @@ public class BucketManagerImpl implements BucketManager {
   private final OMMetadataManager metadataManager;
   private final KeyProviderCryptoExtension kmsProvider;
 
-  private final boolean isRatisEnabled;
-
   /**
    * Constructs BucketManager.
    *
@@ -85,7 +83,6 @@ public class BucketManagerImpl implements BucketManager {
       KeyProviderCryptoExtension kmsProvider, boolean isRatisEnabled) {
     this.metadataManager = metadataManager;
     this.kmsProvider = kmsProvider;
-    this.isRatisEnabled = isRatisEnabled;
   }
 
   KeyProviderCryptoExtension getKMSProvider() {

+ 24 - 0
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java

@@ -113,17 +113,24 @@ public class OMMetrics {
 
   private @Metric MutableCounterLong numVolumes;
   private @Metric MutableCounterLong numBuckets;
+  private @Metric MutableCounterLong numS3Buckets;
 
   //TODO: This metric is an estimate and it may be inaccurate on restart if the
   // OM process was not shutdown cleanly. Key creations/deletions in the last
   // few minutes before restart may not be included in this count.
   private @Metric MutableCounterLong numKeys;
 
+
+
   // Metrics to track checkpointing statistics from last run.
   private @Metric MutableGaugeLong lastCheckpointCreationTimeTaken;
   private @Metric MutableGaugeLong lastCheckpointTarOperationTimeTaken;
   private @Metric MutableGaugeLong lastCheckpointStreamingTimeTaken;
 
+  private @Metric MutableCounterLong numS3BucketCreates;
+  private @Metric MutableCounterLong numS3BucketCreateFails;
+
+
   public OMMetrics() {
   }
 
@@ -134,6 +141,23 @@ public class OMMetrics {
         new OMMetrics());
   }
 
+  public void incNumS3BucketCreates() {
+    numBucketOps.incr();
+    numS3BucketCreates.incr();
+  }
+
+  public void incNumS3BucketCreateFails() {
+    numS3BucketCreateFails.incr();
+  }
+
+  public void incNumS3Buckets() {
+    numS3Buckets.incr();
+  }
+
+  public void decNumS3Buckets() {
+    numS3Buckets.incr();
+  }
+
   public void incNumVolumes() {
     numVolumes.incr();
   }

+ 5 - 2
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java

@@ -69,6 +69,7 @@ import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX;
 import org.apache.hadoop.utils.db.TypedTable;
 import org.apache.hadoop.utils.db.cache.CacheKey;
 import org.apache.hadoop.utils.db.cache.CacheValue;
+import org.apache.hadoop.utils.db.cache.TableCacheImpl;
 import org.eclipse.jetty.util.StringUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -269,11 +270,13 @@ public class OmMetadataManagerImpl implements OMMetadataManager {
         this.store.getTable(USER_TABLE, String.class, VolumeList.class);
     checkTableStatus(userTable, USER_TABLE);
     volumeTable =
-        this.store.getTable(VOLUME_TABLE, String.class, OmVolumeArgs.class);
+        this.store.getTable(VOLUME_TABLE, String.class, OmVolumeArgs.class,
+            TableCacheImpl.CacheCleanupPolicy.NEVER);
     checkTableStatus(volumeTable, VOLUME_TABLE);
 
     bucketTable =
-        this.store.getTable(BUCKET_TABLE, String.class, OmBucketInfo.class);
+        this.store.getTable(BUCKET_TABLE, String.class, OmBucketInfo.class,
+            TableCacheImpl.CacheCleanupPolicy.NEVER);
 
     checkTableStatus(bucketTable, BUCKET_TABLE);
 

+ 0 - 77
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java

@@ -77,15 +77,11 @@ import org.apache.hadoop.ozone.OzoneConfigKeys;
 import org.apache.hadoop.ozone.OzoneIllegalArgumentException;
 import org.apache.hadoop.ozone.OzoneSecurityUtil;
 import org.apache.hadoop.ozone.om.ha.OMFailoverProxyProvider;
-import org.apache.hadoop.ozone.om.helpers.OmDeleteVolumeResponse;
-import org.apache.hadoop.ozone.om.helpers.OmVolumeOwnerChangeResponse;
 import org.apache.hadoop.ozone.om.helpers.S3SecretValue;
 import org.apache.hadoop.ozone.om.protocol.OzoneManagerServerProtocol;
 import org.apache.hadoop.ozone.om.snapshot.OzoneManagerSnapshotProvider;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
     .KeyArgs;
-import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
-    .VolumeList;
 import org.apache.hadoop.ozone.security.OzoneSecurityException;
 import org.apache.hadoop.ozone.security.OzoneTokenIdentifier;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
@@ -1721,79 +1717,6 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl
     }
   }
 
-  @Override
-  public VolumeList startCreateVolume(OmVolumeArgs args) throws IOException {
-    // TODO: Need to add metrics and Audit log for HA requests
-    if(isAclEnabled) {
-      checkAcls(ResourceType.VOLUME, StoreType.OZONE, ACLType.CREATE,
-          args.getVolume(), null, null);
-    }
-    VolumeList volumeList = volumeManager.createVolume(args);
-    return volumeList;
-  }
-
-  @Override
-  public void applyCreateVolume(OmVolumeArgs omVolumeArgs,
-      VolumeList volumeList) throws IOException {
-    // TODO: Need to add metrics and Audit log for HA requests
-    volumeManager.applyCreateVolume(omVolumeArgs, volumeList);
-  }
-
-  @Override
-  public OmVolumeOwnerChangeResponse startSetOwner(String volume,
-      String owner) throws IOException {
-    // TODO: Need to add metrics and Audit log for HA requests
-    if (isAclEnabled) {
-      checkAcls(ResourceType.VOLUME, StoreType.OZONE, ACLType.WRITE_ACL, volume,
-          null, null);
-    }
-    return volumeManager.setOwner(volume, owner);
-  }
-
-  @Override
-  public void applySetOwner(String oldOwner, VolumeList oldOwnerVolumeList,
-      VolumeList newOwnerVolumeList, OmVolumeArgs newOwnerVolumeArgs)
-      throws IOException {
-    // TODO: Need to add metrics and Audit log for HA requests
-    volumeManager.applySetOwner(oldOwner, oldOwnerVolumeList,
-        newOwnerVolumeList, newOwnerVolumeArgs);
-  }
-
-  @Override
-  public OmVolumeArgs startSetQuota(String volume, long quota)
-      throws IOException {
-    // TODO: Need to add metrics and Audit log for HA requests
-    if (isAclEnabled) {
-      checkAcls(ResourceType.VOLUME, StoreType.OZONE, ACLType.WRITE_ACL, volume,
-          null, null);
-    }
-    return volumeManager.setQuota(volume, quota);
-  }
-
-  @Override
-  public void applySetQuota(OmVolumeArgs omVolumeArgs) throws IOException {
-      // TODO: Need to add metrics and Audit log for HA requests
-    volumeManager.applySetQuota(omVolumeArgs);
-  }
-
-  @Override
-  public OmDeleteVolumeResponse startDeleteVolume(String volume)
-      throws IOException {
-    if(isAclEnabled) {
-      checkAcls(ResourceType.VOLUME, StoreType.OZONE, ACLType.DELETE, volume,
-          null, null);
-    }
-    // TODO: Need to add metrics and Audit log for HA requests
-    return volumeManager.deleteVolume(volume);
-  }
-
-  @Override
-  public void applyDeleteVolume(String volume, String owner,
-      VolumeList newVolumeList) throws IOException {
-    // TODO: Need to add metrics and Audit log for HA requests
-    volumeManager.applyDeleteVolume(volume, owner, newVolumeList);
-  }
-
   /**
    * Checks if current caller has acl permissions.
    *

+ 3 - 10
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/S3BucketManagerImpl.java

@@ -55,7 +55,6 @@ public class S3BucketManagerImpl implements S3BucketManager {
   private final OMMetadataManager omMetadataManager;
   private final VolumeManager volumeManager;
   private final BucketManager bucketManager;
-  private final boolean isRatisEnabled;
 
   /**
    * Construct an S3 Bucket Manager Object.
@@ -72,9 +71,6 @@ public class S3BucketManagerImpl implements S3BucketManager {
     this.omMetadataManager = omMetadataManager;
     this.volumeManager = volumeManager;
     this.bucketManager = bucketManager;
-    isRatisEnabled = configuration.getBoolean(
-        OMConfigKeys.OZONE_OM_RATIS_ENABLE_KEY,
-        OMConfigKeys.OZONE_OM_RATIS_ENABLE_DEFAULT);
   }
 
   @Override
@@ -176,12 +172,9 @@ public class S3BucketManagerImpl implements S3BucketManager {
       }
 
       OmVolumeArgs args = builder.build();
-      if (isRatisEnabled) {
-        // When ratis is enabled we need to call apply also.
-        volumeManager.applyCreateVolume(args, volumeManager.createVolume(args));
-      } else {
-        volumeManager.createVolume(args);
-      }
+
+      volumeManager.createVolume(args);
+
     } catch (OMException exp) {
       newVolumeCreate = false;
       if (exp.getResult().compareTo(VOLUME_ALREADY_EXISTS) == 0) {

+ 4 - 46
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/VolumeManager.java

@@ -16,13 +16,9 @@
  */
 package org.apache.hadoop.ozone.om;
 
-import org.apache.hadoop.ozone.om.helpers.OmDeleteVolumeResponse;
 import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs;
-import org.apache.hadoop.ozone.om.helpers.OmVolumeOwnerChangeResponse;
 import org.apache.hadoop.ozone.protocol.proto
     .OzoneManagerProtocolProtos.OzoneAclInfo;
-import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
-    .VolumeList;
 
 import java.io.IOException;
 import java.util.List;
@@ -36,18 +32,9 @@ public interface VolumeManager extends IOzoneAcl {
    * Create a new volume.
    * @param args - Volume args to create a volume
    */
-  VolumeList createVolume(OmVolumeArgs args)
+  void createVolume(OmVolumeArgs args)
       throws IOException;
 
-  /**
-   * Apply Create Volume changes to OM DB.
-   * @param omVolumeArgs
-   * @param volumeList
-   * @throws IOException
-   */
-  void applyCreateVolume(OmVolumeArgs omVolumeArgs,
-      VolumeList volumeList) throws IOException;
-
   /**
    * Changes the owner of a volume.
    *
@@ -55,19 +42,7 @@ public interface VolumeManager extends IOzoneAcl {
    * @param owner - Name of the owner.
    * @throws IOException
    */
-  OmVolumeOwnerChangeResponse setOwner(String volume, String owner)
-      throws IOException;
-
-  /**
-   * Apply Set Owner changes to OM DB.
-   * @param oldOwner
-   * @param oldOwnerVolumeList
-   * @param newOwnerVolumeList
-   * @param newOwnerVolumeArgs
-   * @throws IOException
-   */
-  void applySetOwner(String oldOwner, VolumeList oldOwnerVolumeList,
-      VolumeList newOwnerVolumeList, OmVolumeArgs newOwnerVolumeArgs)
+  void setOwner(String volume, String owner)
       throws IOException;
 
   /**
@@ -77,14 +52,7 @@ public interface VolumeManager extends IOzoneAcl {
    * @param quota - Quota in bytes.
    * @throws IOException
    */
-  OmVolumeArgs setQuota(String volume, long quota) throws IOException;
-
-  /**
-   * Apply Set Quota changes to OM DB.
-   * @param omVolumeArgs
-   * @throws IOException
-   */
-  void applySetQuota(OmVolumeArgs omVolumeArgs) throws IOException;
+  void setQuota(String volume, long quota) throws IOException;
 
   /**
    * Gets the volume information.
@@ -100,17 +68,7 @@ public interface VolumeManager extends IOzoneAcl {
    * @param volume - Name of the volume.
    * @throws IOException
    */
-  OmDeleteVolumeResponse deleteVolume(String volume) throws IOException;
-
-  /**
-   * Apply Delete Volume changes to OM DB.
-   * @param volume
-   * @param owner
-   * @param newVolumeList
-   * @throws IOException
-   */
-  void applyDeleteVolume(String volume, String owner,
-      VolumeList newVolumeList) throws IOException;
+  void deleteVolume(String volume) throws IOException;
 
   /**
    * Checks if the specified user with a role can access this volume.

+ 13 - 89
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/VolumeManagerImpl.java

@@ -27,9 +27,7 @@ import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ozone.OzoneAcl;
 import org.apache.hadoop.ozone.OzoneConfigKeys;
 import org.apache.hadoop.ozone.om.exceptions.OMException;
-import org.apache.hadoop.ozone.om.helpers.OmDeleteVolumeResponse;
 import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs;
-import org.apache.hadoop.ozone.om.helpers.OmVolumeOwnerChangeResponse;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OzoneAclInfo;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.VolumeList;
 import org.apache.hadoop.ozone.security.acl.IAccessAuthorizer;
@@ -57,9 +55,9 @@ public class VolumeManagerImpl implements VolumeManager {
 
   private final OMMetadataManager metadataManager;
   private final int maxUserVolumeCount;
-  private final boolean isRatisEnabled;
   private final boolean aclEnabled;
 
+
   /**
    * Constructor.
    * @param conf - Ozone configuration.
@@ -70,9 +68,6 @@ public class VolumeManagerImpl implements VolumeManager {
     this.metadataManager = metadataManager;
     this.maxUserVolumeCount = conf.getInt(OZONE_OM_USER_MAX_VOLUME,
         OZONE_OM_USER_MAX_VOLUME_DEFAULT);
-    isRatisEnabled = conf.getBoolean(
-        OMConfigKeys.OZONE_OM_RATIS_ENABLE_KEY,
-        OMConfigKeys.OZONE_OM_RATIS_ENABLE_DEFAULT);
     aclEnabled = conf.getBoolean(OzoneConfigKeys.OZONE_ACL_ENABLED,
         OzoneConfigKeys.OZONE_ACL_ENABLED_DEFAULT);
   }
@@ -125,10 +120,9 @@ public class VolumeManagerImpl implements VolumeManager {
   /**
    * Creates a volume.
    * @param omVolumeArgs - OmVolumeArgs.
-   * @return VolumeList
    */
   @Override
-  public VolumeList createVolume(OmVolumeArgs omVolumeArgs) throws IOException {
+  public void createVolume(OmVolumeArgs omVolumeArgs) throws IOException {
     Preconditions.checkNotNull(omVolumeArgs);
 
     boolean acquiredUserLock = false;
@@ -156,13 +150,12 @@ public class VolumeManagerImpl implements VolumeManager {
       // Set creation time
       omVolumeArgs.setCreationTime(System.currentTimeMillis());
 
-      if (!isRatisEnabled) {
-        createVolumeCommitToDB(omVolumeArgs, volumeList, dbVolumeKey,
+
+      createVolumeCommitToDB(omVolumeArgs, volumeList, dbVolumeKey,
             dbUserKey);
-      }
+
       LOG.debug("created volume:{} user:{}", omVolumeArgs.getVolume(),
           omVolumeArgs.getOwnerName());
-      return volumeList;
     } catch (IOException ex) {
       if (!(ex instanceof OMException)) {
         LOG.error("Volume creation failed for user:{} volume:{}",
@@ -179,22 +172,6 @@ public class VolumeManagerImpl implements VolumeManager {
     }
   }
 
-
-  @Override
-  public void applyCreateVolume(OmVolumeArgs omVolumeArgs,
-      VolumeList volumeList) throws IOException {
-    // Do we need to hold lock in apply Transactions requests?
-    String dbVolumeKey = metadataManager.getVolumeKey(omVolumeArgs.getVolume());
-    String dbUserKey = metadataManager.getUserKey(omVolumeArgs.getOwnerName());
-    try {
-      createVolumeCommitToDB(omVolumeArgs, volumeList, dbVolumeKey, dbUserKey);
-    } catch (IOException ex) {
-      LOG.error("Volume creation failed for user:{} volume:{}",
-          omVolumeArgs.getOwnerName(), omVolumeArgs.getVolume(), ex);
-      throw ex;
-    }
-  }
-
   private void createVolumeCommitToDB(OmVolumeArgs omVolumeArgs,
       VolumeList volumeList, String dbVolumeKey, String dbUserKey)
       throws IOException {
@@ -220,7 +197,7 @@ public class VolumeManagerImpl implements VolumeManager {
    * @throws IOException
    */
   @Override
-  public OmVolumeOwnerChangeResponse setOwner(String volume, String owner)
+  public void setOwner(String volume, String owner)
       throws IOException {
     Preconditions.checkNotNull(volume);
     Preconditions.checkNotNull(owner);
@@ -252,12 +229,8 @@ public class VolumeManagerImpl implements VolumeManager {
       VolumeList newOwnerVolumeList = addVolumeToOwnerList(volume, newOwner);
 
       volumeArgs.setOwnerName(owner);
-      if (!isRatisEnabled) {
-        setOwnerCommitToDB(oldOwnerVolumeList, newOwnerVolumeList,
-            volumeArgs, owner);
-      }
-      return new OmVolumeOwnerChangeResponse(oldOwnerVolumeList,
-          newOwnerVolumeList, volumeArgs, originalOwner);
+      setOwnerCommitToDB(oldOwnerVolumeList, newOwnerVolumeList,
+          volumeArgs, owner);
     } catch (IOException ex) {
       if (!(ex instanceof OMException)) {
         LOG.error("Changing volume ownership failed for user:{} volume:{}",
@@ -272,21 +245,6 @@ public class VolumeManagerImpl implements VolumeManager {
     }
   }
 
-  @Override
-  public void applySetOwner(String oldOwner, VolumeList oldOwnerVolumeList,
-      VolumeList newOwnerVolumeList, OmVolumeArgs newOwnerVolumeArgs)
-      throws IOException {
-    try {
-      setOwnerCommitToDB(oldOwnerVolumeList, newOwnerVolumeList,
-          newOwnerVolumeArgs, oldOwner);
-    } catch (IOException ex) {
-      LOG.error("Changing volume ownership failed for user:{} volume:{}",
-          newOwnerVolumeArgs.getOwnerName(), newOwnerVolumeArgs.getVolume(),
-          ex);
-      throw ex;
-    }
-  }
-
 
   private void setOwnerCommitToDB(VolumeList oldOwnerVolumeList,
       VolumeList newOwnerVolumeList, OmVolumeArgs newOwnerVolumeArgs,
@@ -318,11 +276,10 @@ public class VolumeManagerImpl implements VolumeManager {
    * @param volume - Name of the volume.
    * @param quota - Quota in bytes.
    *
-   * @return OmVolumeArgs
    * @throws IOException
    */
   @Override
-  public OmVolumeArgs setQuota(String volume, long quota) throws IOException {
+  public void setQuota(String volume, long quota) throws IOException {
     Preconditions.checkNotNull(volume);
     metadataManager.getLock().acquireLock(VOLUME_LOCK, volume);
     try {
@@ -336,13 +293,9 @@ public class VolumeManagerImpl implements VolumeManager {
 
       Preconditions.checkState(volume.equals(volumeArgs.getVolume()));
 
-
       volumeArgs.setQuotaInBytes(quota);
 
-      if (!isRatisEnabled) {
-        metadataManager.getVolumeTable().put(dbVolumeKey, volumeArgs);
-      }
-      return volumeArgs;
+      metadataManager.getVolumeTable().put(dbVolumeKey, volumeArgs);
     } catch (IOException ex) {
       if (!(ex instanceof OMException)) {
         LOG.error("Changing volume quota failed for volume:{} quota:{}", volume,
@@ -354,19 +307,6 @@ public class VolumeManagerImpl implements VolumeManager {
     }
   }
 
-  @Override
-  public void applySetQuota(OmVolumeArgs omVolumeArgs) throws IOException {
-    try {
-      String dbVolumeKey = metadataManager.getVolumeKey(
-          omVolumeArgs.getVolume());
-      metadataManager.getVolumeTable().put(dbVolumeKey, omVolumeArgs);
-    } catch (IOException ex) {
-      LOG.error("Changing volume quota failed for volume:{} quota:{}",
-          omVolumeArgs.getVolume(), omVolumeArgs.getQuotaInBytes(), ex);
-      throw ex;
-    }
-  }
-
   /**
    * Gets the volume information.
    * @param volume - Volume name.
@@ -402,12 +342,10 @@ public class VolumeManagerImpl implements VolumeManager {
    * Deletes an existing empty volume.
    *
    * @param volume - Name of the volume.
-   *
-   * @return OmDeleteVolumeResponse
    * @throws IOException
    */
   @Override
-  public OmDeleteVolumeResponse deleteVolume(String volume) throws IOException {
+  public void deleteVolume(String volume) throws IOException {
     Preconditions.checkNotNull(volume);
     String owner = null;
     boolean acquiredUserLock = false;
@@ -435,11 +373,8 @@ public class VolumeManagerImpl implements VolumeManager {
       VolumeList newVolumeList = delVolumeFromOwnerList(volume,
           volumeArgs.getOwnerName());
 
-      if (!isRatisEnabled) {
-        deleteVolumeCommitToDB(newVolumeList,
-            volume, owner);
-      }
-      return new OmDeleteVolumeResponse(volume, owner, newVolumeList);
+
+      deleteVolumeCommitToDB(newVolumeList, volume, owner);
     } catch (IOException ex) {
       if (!(ex instanceof OMException)) {
         LOG.error("Delete volume failed for volume:{}", volume, ex);
@@ -454,17 +389,6 @@ public class VolumeManagerImpl implements VolumeManager {
     }
   }
 
-  @Override
-  public void applyDeleteVolume(String volume, String owner,
-      VolumeList newVolumeList) throws IOException {
-    try {
-      deleteVolumeCommitToDB(newVolumeList, volume, owner);
-    } catch (IOException ex) {
-      LOG.error("Delete volume failed for volume:{}", volume,
-          ex);
-      throw ex;
-    }
-  }
 
   private void deleteVolumeCommitToDB(VolumeList newVolumeList,
       String volume, String owner) throws IOException {

+ 1 - 0
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java

@@ -175,6 +175,7 @@ public class OzoneManagerDoubleBuffer {
     omMetadataManager.getOpenKeyTable().cleanupCache(lastRatisTransactionIndex);
     omMetadataManager.getKeyTable().cleanupCache(lastRatisTransactionIndex);
     omMetadataManager.getDeletedTable().cleanupCache(lastRatisTransactionIndex);
+    omMetadataManager.getS3Table().cleanupCache(lastRatisTransactionIndex);
   }
 
   /**

+ 3 - 0
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/utils/OzoneManagerRatisUtils.java

@@ -31,6 +31,7 @@ import org.apache.hadoop.ozone.om.request.key.OMKeyCreateRequest;
 import org.apache.hadoop.ozone.om.request.key.OMKeyDeleteRequest;
 import org.apache.hadoop.ozone.om.request.key.OMKeyPurgeRequest;
 import org.apache.hadoop.ozone.om.request.key.OMKeyRenameRequest;
+import org.apache.hadoop.ozone.om.request.s3.bucket.S3BucketCreateRequest;
 import org.apache.hadoop.ozone.om.request.volume.OMVolumeCreateRequest;
 import org.apache.hadoop.ozone.om.request.volume.OMVolumeDeleteRequest;
 import org.apache.hadoop.ozone.om.request.volume.OMVolumeSetOwnerRequest;
@@ -99,6 +100,8 @@ public final class OzoneManagerRatisUtils {
       return new OMFileCreateRequest(omRequest);
     case PurgeKeys:
       return new OMKeyPurgeRequest(omRequest);
+    case CreateS3Bucket:
+      return new S3BucketCreateRequest(omRequest);
     default:
       // TODO: will update once all request types are implemented.
       return null;

+ 11 - 0
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCommitRequest.java

@@ -178,6 +178,17 @@ public class OMKeyCommitRequest extends OMKeyRequest {
     // return response after releasing lock.
     if (exception == null) {
       omResponse.setCommitKeyResponse(CommitKeyResponse.newBuilder().build());
+
+      // As when we commit the key, then it is visible in ozone, so we should
+      // increment here.
+      // As key also can have multiple versions, we need to increment keys
+      // only if version is 0. Currently we have not complete support of
+      // versioning of keys. So, this can be revisited later.
+
+      if (omKeyInfo.getKeyLocationVersions().size() == 1) {
+        omMetrics.incNumKeys();
+      }
+
       return new OMKeyCommitResponse(omKeyInfo, commitKeyRequest.getClientID(),
           omResponse.build());
     } else {

+ 361 - 0
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/bucket/S3BucketCreateRequest.java

@@ -0,0 +1,361 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.om.request.s3.bucket;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Optional;
+import com.google.common.base.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.hdds.protocol.StorageType;
+import org.apache.hadoop.ozone.OzoneConsts;
+import org.apache.hadoop.ozone.audit.OMAction;
+import org.apache.hadoop.ozone.om.OMMetadataManager;
+import org.apache.hadoop.ozone.om.OMMetrics;
+import org.apache.hadoop.ozone.om.OzoneManager;
+import org.apache.hadoop.ozone.om.exceptions.OMException;
+import org.apache.hadoop.ozone.om.helpers.OmBucketInfo;
+import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs;
+import org.apache.hadoop.ozone.om.request.volume.OMVolumeRequest;
+import org.apache.hadoop.ozone.om.response.OMClientResponse;
+import org.apache.hadoop.ozone.om.response.bucket.OMBucketCreateResponse;
+import org.apache.hadoop.ozone.om.response.s3.bucket.S3BucketCreateResponse;
+import org.apache.hadoop.ozone.om.response.volume.OMVolumeCreateResponse;
+import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos;
+import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
+    .OMRequest;
+import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
+    .OMResponse;
+import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
+    .S3CreateBucketRequest;
+import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
+    .S3CreateBucketResponse;
+import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
+    .S3CreateVolumeInfo;
+import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
+    .VolumeList;
+import org.apache.hadoop.ozone.security.acl.IAccessAuthorizer;
+import org.apache.hadoop.ozone.security.acl.OzoneObj;
+import org.apache.hadoop.util.Time;
+import org.apache.hadoop.utils.db.cache.CacheKey;
+import org.apache.hadoop.utils.db.cache.CacheValue;
+
+
+import static org.apache.hadoop.ozone.OzoneConsts.OM_S3_VOLUME_PREFIX;
+import static org.apache.hadoop.ozone.OzoneConsts.S3_BUCKET_MAX_LENGTH;
+import static org.apache.hadoop.ozone.OzoneConsts.S3_BUCKET_MIN_LENGTH;
+import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.BUCKET_LOCK;
+import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.S3_BUCKET_LOCK;
+import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.USER_LOCK;
+import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.VOLUME_LOCK;
+
+/**
+ * Handles S3 Bucket create request.
+ */
+public class S3BucketCreateRequest extends OMVolumeRequest {
+
+  private static final String S3_ADMIN_NAME = "OzoneS3Manager";
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(S3CreateBucketRequest.class);
+
+  public S3BucketCreateRequest(OMRequest omRequest) {
+    super(omRequest);
+  }
+
+  @Override
+  public OMRequest preExecute(OzoneManager ozoneManager) throws IOException {
+    S3CreateBucketRequest s3CreateBucketRequest =
+        getOmRequest().getCreateS3BucketRequest();
+    Preconditions.checkNotNull(s3CreateBucketRequest);
+
+    S3CreateBucketRequest.Builder newS3CreateBucketRequest =
+        s3CreateBucketRequest.toBuilder().setS3CreateVolumeInfo(
+            S3CreateVolumeInfo.newBuilder().setCreationTime(Time.now()));
+
+    // TODO: Do we need to enforce the bucket rules in this code path?
+    // https://docs.aws.amazon.com/AmazonS3/latest/dev/BucketRestrictions.html
+
+    // For now only checked the length.
+    int bucketLength = s3CreateBucketRequest.getS3Bucketname().length();
+    if (bucketLength < S3_BUCKET_MIN_LENGTH ||
+        bucketLength >= S3_BUCKET_MAX_LENGTH) {
+      throw new OMException("S3BucketName must be at least 3 and not more " +
+          "than 63 characters long",
+          OMException.ResultCodes.S3_BUCKET_INVALID_LENGTH);
+    }
+
+    return getOmRequest().toBuilder()
+        .setCreateS3BucketRequest(newS3CreateBucketRequest)
+        .setUserInfo(getUserInfo()).build();
+  }
+
+  @Override
+  public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager,
+      long transactionLogIndex) {
+
+    S3CreateBucketRequest s3CreateBucketRequest =
+        getOmRequest().getCreateS3BucketRequest();
+
+    String userName = s3CreateBucketRequest.getUserName();
+    String s3BucketName = s3CreateBucketRequest.getS3Bucketname();
+
+    OMResponse.Builder omResponse = OMResponse.newBuilder().setCmdType(
+        OzoneManagerProtocolProtos.Type.CreateS3Bucket).setStatus(
+        OzoneManagerProtocolProtos.Status.OK).setSuccess(true);
+
+    OMMetrics omMetrics = ozoneManager.getMetrics();
+    omMetrics.incNumS3BucketCreates();
+
+    // When s3 Bucket is created, we internally create ozone volume/ozone
+    // bucket.
+
+    // ozone volume name is generated from userName by calling
+    // formatOzoneVolumeName.
+
+    // ozone bucket name is same as s3 bucket name.
+    // In S3 buckets are unique, so we create a mapping like s3BucketName ->
+    // ozoneVolume/ozoneBucket and add it to s3 mapping table. If
+    // s3BucketName exists in mapping table, bucket already exist or we go
+    // ahead and create a bucket.
+    OMMetadataManager omMetadataManager = ozoneManager.getMetadataManager();
+    IOException exception = null;
+    VolumeList volumeList = null;
+    OmVolumeArgs omVolumeArgs = null;
+    OmBucketInfo omBucketInfo = null;
+
+    boolean volumeCreated = false;
+    boolean acquiredVolumeLock = false;
+    boolean acquiredUserLock = false;
+    boolean acquiredS3Lock = false;
+    String volumeName = formatOzoneVolumeName(userName);
+    try {
+      // check Acl
+      if (ozoneManager.getAclsEnabled()) {
+        checkAcls(ozoneManager, OzoneObj.ResourceType.BUCKET,
+            OzoneObj.StoreType.S3, IAccessAuthorizer.ACLType.CREATE, null,
+            s3BucketName, null);
+      }
+
+      acquiredS3Lock = omMetadataManager.getLock().acquireLock(S3_BUCKET_LOCK,
+          s3BucketName);
+
+      // First check if this s3Bucket exists
+      if (omMetadataManager.getS3Table().isExist(s3BucketName)) {
+        throw new OMException("S3Bucket " + s3BucketName + " already exists",
+            OMException.ResultCodes.S3_BUCKET_ALREADY_EXISTS);
+      }
+
+      try {
+        acquiredVolumeLock =
+            omMetadataManager.getLock().acquireLock(VOLUME_LOCK, volumeName);
+        acquiredUserLock = omMetadataManager.getLock().acquireLock(USER_LOCK,
+            userName);
+        // Check if volume exists, if it does not exist create
+        // ozone volume.
+        String volumeKey = omMetadataManager.getVolumeKey(volumeName);
+        if (!omMetadataManager.getVolumeTable().isExist(volumeKey)) {
+          omVolumeArgs = createOmVolumeArgs(volumeName, userName,
+              s3CreateBucketRequest.getS3CreateVolumeInfo()
+                  .getCreationTime());
+          volumeList = omMetadataManager.getUserTable().get(
+              omMetadataManager.getUserKey(userName));
+          volumeList = addVolumeToOwnerList(volumeList,
+              volumeName, userName, ozoneManager.getMaxUserVolumeCount());
+          createVolume(omMetadataManager, omVolumeArgs, volumeList, volumeKey,
+              omMetadataManager.getUserKey(userName), transactionLogIndex);
+          volumeCreated = true;
+        }
+      } finally {
+        if (acquiredUserLock) {
+          omMetadataManager.getLock().releaseLock(USER_LOCK, userName);
+        }
+        if (acquiredVolumeLock) {
+          omMetadataManager.getLock().releaseLock(VOLUME_LOCK, volumeName);
+        }
+      }
+
+      // check if ozone bucket exists, if it does not exist create ozone
+      // bucket
+      omBucketInfo = createBucket(omMetadataManager, volumeName, s3BucketName,
+          s3CreateBucketRequest.getS3CreateVolumeInfo().getCreationTime(),
+          transactionLogIndex);
+
+      // Now finally add it to s3 table cache.
+      omMetadataManager.getS3Table().addCacheEntry(
+          new CacheKey<>(s3BucketName), new CacheValue<>(
+              Optional.of(formatS3MappingName(volumeName, s3BucketName)),
+              transactionLogIndex));
+    } catch (IOException ex) {
+      exception = ex;
+    } finally {
+      if (acquiredS3Lock) {
+        omMetadataManager.getLock().releaseLock(S3_BUCKET_LOCK, s3BucketName);
+      }
+    }
+
+    // Performing audit logging outside of the lock.
+    auditLog(ozoneManager.getAuditLogger(),
+        buildAuditMessage(OMAction.CREATE_S3_BUCKET,
+            buildAuditMap(userName, s3BucketName), exception,
+            getOmRequest().getUserInfo()));
+
+    if (exception == null) {
+      LOG.debug("S3Bucket is successfully created for userName: {}, " +
+          "s3BucketName {}, volumeName {}", userName, s3BucketName, volumeName);
+      OMVolumeCreateResponse omVolumeCreateResponse = null;
+      if (volumeCreated) {
+        omMetrics.incNumVolumes();
+        omVolumeCreateResponse = new OMVolumeCreateResponse(omVolumeArgs,
+            volumeList, omResponse.build());
+      }
+
+      omMetrics.incNumBuckets();
+      OMBucketCreateResponse omBucketCreateResponse =
+          new OMBucketCreateResponse(omBucketInfo, omResponse.build());
+      omMetrics.incNumS3Buckets();
+      return new S3BucketCreateResponse(omVolumeCreateResponse,
+          omBucketCreateResponse, s3BucketName,
+          formatS3MappingName(volumeName, s3BucketName),
+          omResponse.setCreateS3BucketResponse(
+              S3CreateBucketResponse.newBuilder()).build());
+    } else {
+      LOG.error("S3Bucket Creation Failed for userName: {}, s3BucketName {}, " +
+          "VolumeName {}", userName, s3BucketName, volumeName);
+      omMetrics.incNumS3BucketCreateFails();
+      return new S3BucketCreateResponse(null, null, null, null,
+          createErrorOMResponse(omResponse, exception));
+    }
+  }
+
+
+  private OmBucketInfo createBucket(OMMetadataManager omMetadataManager,
+      String volumeName, String s3BucketName, long creationTime,
+      long transactionLogIndex) throws IOException {
+    // check if ozone bucket exists, if it does not exist create ozone
+    // bucket
+    boolean acquireBucketLock = false;
+    OmBucketInfo omBucketInfo = null;
+    try {
+      acquireBucketLock =
+          omMetadataManager.getLock().acquireLock(BUCKET_LOCK, volumeName,
+              s3BucketName);
+      String bucketKey = omMetadataManager.getBucketKey(volumeName,
+          s3BucketName);
+      if (!omMetadataManager.getBucketTable().isExist(bucketKey)) {
+        omBucketInfo = createOmBucketInfo(volumeName, s3BucketName,
+            creationTime);
+        // Add to bucket table cache.
+        omMetadataManager.getBucketTable().addCacheEntry(
+            new CacheKey<>(bucketKey),
+            new CacheValue<>(Optional.of(omBucketInfo), transactionLogIndex));
+      } else {
+        // This can happen when a ozone bucket exists already in the
+        // volume, but this is not a s3 bucket.
+        throw new OMException("Bucket " + s3BucketName + " already exists",
+            OMException.ResultCodes.BUCKET_ALREADY_EXISTS);
+      }
+    } finally {
+      if (acquireBucketLock) {
+        omMetadataManager.getLock().releaseLock(BUCKET_LOCK, volumeName,
+            s3BucketName);
+      }
+    }
+    return omBucketInfo;
+  }
+
+  /**
+   * Generate Ozone volume name from userName.
+   * @param userName
+   * @return volume name
+   */
+  @VisibleForTesting
+  public static String formatOzoneVolumeName(String userName) {
+    return String.format(OM_S3_VOLUME_PREFIX + "%s", userName);
+  }
+
+  /**
+   * Generate S3Mapping for provided volume and bucket. This information will
+   * be persisted in s3 table in OM DB.
+   * @param volumeName
+   * @param bucketName
+   * @return s3Mapping
+   */
+  @VisibleForTesting
+  public static String formatS3MappingName(String volumeName,
+      String bucketName) {
+    return String.format("%s" + OzoneConsts.OM_KEY_PREFIX + "%s", volumeName,
+        bucketName);
+  }
+
+  /**
+   * Create {@link OmVolumeArgs} which needs to be persisted in volume table
+   * in OM DB.
+   * @param volumeName
+   * @param userName
+   * @param creationTime
+   * @return {@link OmVolumeArgs}
+   */
+  private OmVolumeArgs createOmVolumeArgs(String volumeName, String userName,
+      long creationTime) {
+    return OmVolumeArgs.newBuilder()
+        .setAdminName(S3_ADMIN_NAME).setVolume(volumeName)
+        .setQuotaInBytes(OzoneConsts.MAX_QUOTA_IN_BYTES)
+        .setOwnerName(userName)
+        .setCreationTime(creationTime).build();
+  }
+
+  /**
+   * Create {@link OmBucketInfo} which needs to be persisted in to bucket table
+   * in OM DB.
+   * @param volumeName
+   * @param s3BucketName
+   * @param creationTime
+   * @return {@link OmBucketInfo}
+   */
+  private OmBucketInfo createOmBucketInfo(String volumeName,
+      String s3BucketName, long creationTime) {
+    //TODO: Now S3Bucket API takes only bucketName as param. In future if we
+    // support some configurable options we need to fix this.
+    return OmBucketInfo.newBuilder().setVolumeName(volumeName)
+        .setBucketName(s3BucketName).setIsVersionEnabled(Boolean.FALSE)
+        .setStorageType(StorageType.DEFAULT).setCreationTime(creationTime)
+        .build();
+  }
+
+  /**
+   * Build auditMap.
+   * @param userName
+   * @param s3BucketName
+   * @return auditMap
+   */
+  private Map<String, String> buildAuditMap(String userName,
+      String s3BucketName) {
+    Map<String, String> auditMap = new HashMap<>();
+    auditMap.put(userName, OzoneConsts.USERNAME);
+    auditMap.put(s3BucketName, OzoneConsts.S3_BUCKET);
+    return auditMap;
+  }
+}
+

+ 23 - 0
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/s3/bucket/package-info.java

@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+/**
+ * Package contains classes related to s3 bucket requests.
+ */
+package org.apache.hadoop.ozone.om.request.s3.bucket;

+ 12 - 24
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/volume/OMVolumeCreateRequest.java

@@ -20,7 +20,6 @@ package org.apache.hadoop.ozone.om.request.volume;
 
 import java.io.IOException;
 
-import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -29,14 +28,11 @@ import org.apache.hadoop.ozone.audit.AuditLogger;
 import org.apache.hadoop.ozone.audit.OMAction;
 import org.apache.hadoop.ozone.om.response.volume.OMVolumeCreateResponse;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos;
-import org.apache.hadoop.utils.db.cache.CacheKey;
-import org.apache.hadoop.utils.db.cache.CacheValue;
 import org.apache.hadoop.ozone.om.OMMetrics;
 import org.apache.hadoop.ozone.om.OMMetadataManager;
 import org.apache.hadoop.ozone.om.OzoneManager;
 import org.apache.hadoop.ozone.om.exceptions.OMException;
 import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs;
-import org.apache.hadoop.ozone.om.request.OMClientRequest;
 import org.apache.hadoop.ozone.om.response.OMClientResponse;
 import org.apache.hadoop.ozone.security.acl.IAccessAuthorizer;
 import org.apache.hadoop.ozone.security.acl.OzoneObj;
@@ -60,8 +56,7 @@ import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.USER_LOC
 /**
  * Handles volume create request.
  */
-public class OMVolumeCreateRequest extends OMClientRequest
-    implements OMVolumeRequest {
+public class OMVolumeCreateRequest extends OMVolumeRequest {
   private static final Logger LOG =
       LoggerFactory.getLogger(OMVolumeCreateRequest.class);
 
@@ -132,10 +127,6 @@ public class OMVolumeCreateRequest extends OMClientRequest
           createErrorOMResponse(omResponse, ex));
     }
 
-
-
-    String dbUserKey = omMetadataManager.getUserKey(owner);
-    String dbVolumeKey = omMetadataManager.getVolumeKey(volume);
     VolumeList volumeList = null;
     boolean acquiredUserLock = false;
     IOException exception = null;
@@ -145,28 +136,25 @@ public class OMVolumeCreateRequest extends OMClientRequest
     try {
       acquiredUserLock = omMetadataManager.getLock().acquireLock(USER_LOCK,
           owner);
+      String dbVolumeKey = omMetadataManager.getVolumeKey(volume);
+
       OmVolumeArgs dbVolumeArgs =
           omMetadataManager.getVolumeTable().get(dbVolumeKey);
 
-      // Validation: Check if volume already exists
-      if (dbVolumeArgs != null) {
+      if (dbVolumeArgs == null) {
+        String dbUserKey = omMetadataManager.getUserKey(owner);
+        volumeList = omMetadataManager.getUserTable().get(dbUserKey);
+        volumeList = addVolumeToOwnerList(volumeList, volume, owner,
+            ozoneManager.getMaxUserVolumeCount());
+        createVolume(omMetadataManager, omVolumeArgs, volumeList, dbVolumeKey,
+              dbUserKey, transactionLogIndex);
+        LOG.debug("volume:{} successfully created", omVolumeArgs.getVolume());
+      } else {
         LOG.debug("volume:{} already exists", omVolumeArgs.getVolume());
         throw new OMException("Volume already exists",
             OMException.ResultCodes.VOLUME_ALREADY_EXISTS);
       }
 
-      volumeList = omMetadataManager.getUserTable().get(dbUserKey);
-      volumeList = addVolumeToOwnerList(volumeList,
-          volume, owner, ozoneManager.getMaxUserVolumeCount());
-
-      // Update cache: Update user and volume cache.
-      omMetadataManager.getUserTable().addCacheEntry(new CacheKey<>(dbUserKey),
-          new CacheValue<>(Optional.of(volumeList), transactionLogIndex));
-
-      omMetadataManager.getVolumeTable().addCacheEntry(
-          new CacheKey<>(dbVolumeKey),
-          new CacheValue<>(Optional.of(omVolumeArgs), transactionLogIndex));
-
     } catch (IOException ex) {
       exception = ex;
     } finally {

+ 1 - 3
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/volume/OMVolumeDeleteRequest.java

@@ -35,7 +35,6 @@ import org.apache.hadoop.ozone.om.OMMetrics;
 import org.apache.hadoop.ozone.om.response.volume.OMVolumeCreateResponse;
 import org.apache.hadoop.ozone.security.acl.IAccessAuthorizer;
 import org.apache.hadoop.ozone.security.acl.OzoneObj;
-import org.apache.hadoop.ozone.om.request.OMClientRequest;
 import org.apache.hadoop.ozone.om.response.OMClientResponse;
 import org.apache.hadoop.ozone.om.response.volume.OMVolumeDeleteResponse;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos;
@@ -55,8 +54,7 @@ import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.USER_LOC
 /**
  * Handles volume delete request.
  */
-public class OMVolumeDeleteRequest extends OMClientRequest
-    implements OMVolumeRequest {
+public class OMVolumeDeleteRequest extends OMVolumeRequest {
 
   private static final Logger LOG =
       LoggerFactory.getLogger(OMVolumeDeleteRequest.class);

+ 40 - 5
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/volume/OMVolumeRequest.java

@@ -18,9 +18,17 @@
 
 package org.apache.hadoop.ozone.om.request.volume;
 
+import com.google.common.base.Optional;
+import org.apache.hadoop.ozone.om.OMMetadataManager;
 import org.apache.hadoop.ozone.om.exceptions.OMException;
+import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs;
+import org.apache.hadoop.ozone.om.request.OMClientRequest;
+import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
+    .OMRequest;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
     .VolumeList;
+import org.apache.hadoop.utils.db.cache.CacheKey;
+import org.apache.hadoop.utils.db.cache.CacheValue;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -29,7 +37,11 @@ import java.util.List;
 /**
  * Defines common methods required for volume requests.
  */
-public interface OMVolumeRequest {
+public abstract class OMVolumeRequest extends OMClientRequest {
+
+  public OMVolumeRequest(OMRequest omRequest) {
+    super(omRequest);
+  }
 
   /**
    * Delete volume from user volume list. This method should be called after
@@ -40,7 +52,7 @@ public interface OMVolumeRequest {
    * @return VolumeList - updated volume list for the user.
    * @throws IOException
    */
-  default VolumeList delVolumeFromOwnerList(VolumeList volumeList,
+  protected VolumeList delVolumeFromOwnerList(VolumeList volumeList,
       String volume, String owner) throws IOException {
 
     List<String> prevVolList = new ArrayList<>();
@@ -72,9 +84,8 @@ public interface OMVolumeRequest {
    * @throws OMException - if user has volumes greater than
    * maxUserVolumeCount, an exception is thrown.
    */
-  default VolumeList addVolumeToOwnerList(
-      VolumeList volumeList, String volume, String owner,
-      long maxUserVolumeCount) throws IOException {
+  protected VolumeList addVolumeToOwnerList(VolumeList volumeList,
+      String volume, String owner, long maxUserVolumeCount) throws IOException {
 
     // Check the volume count
     if (volumeList != null &&
@@ -95,4 +106,28 @@ public interface OMVolumeRequest {
 
     return newVolList;
   }
+
+  /**
+   * Create Ozone Volume. This method should be called after acquiring user
+   * and volume Lock.
+   * @param omMetadataManager
+   * @param omVolumeArgs
+   * @param volumeList
+   * @param dbVolumeKey
+   * @param dbUserKey
+   * @param transactionLogIndex
+   * @throws IOException
+   */
+  protected void createVolume(final OMMetadataManager omMetadataManager,
+      OmVolumeArgs omVolumeArgs, VolumeList volumeList, String dbVolumeKey,
+      String dbUserKey, long transactionLogIndex) {
+    // Update cache: Update user and volume cache.
+    omMetadataManager.getUserTable().addCacheEntry(new CacheKey<>(dbUserKey),
+        new CacheValue<>(Optional.of(volumeList), transactionLogIndex));
+
+    omMetadataManager.getVolumeTable().addCacheEntry(
+        new CacheKey<>(dbVolumeKey),
+        new CacheValue<>(Optional.of(omVolumeArgs), transactionLogIndex));
+  }
+
 }

+ 1 - 3
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/volume/OMVolumeSetOwnerRequest.java

@@ -35,7 +35,6 @@ import org.apache.hadoop.ozone.om.OMMetrics;
 import org.apache.hadoop.ozone.om.OzoneManager;
 import org.apache.hadoop.ozone.om.exceptions.OMException;
 import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs;
-import org.apache.hadoop.ozone.om.request.OMClientRequest;
 import org.apache.hadoop.ozone.om.response.OMClientResponse;
 import org.apache.hadoop.ozone.om.response.volume.OMVolumeCreateResponse;
 import org.apache.hadoop.ozone.om.response.volume.OMVolumeSetOwnerResponse;
@@ -58,8 +57,7 @@ import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.VOLUME_L
 /**
  * Handle set owner request for volume.
  */
-public class OMVolumeSetOwnerRequest extends OMClientRequest
-    implements OMVolumeRequest {
+public class OMVolumeSetOwnerRequest extends OMVolumeRequest {
   private static final Logger LOG =
       LoggerFactory.getLogger(OMVolumeSetOwnerRequest.class);
 

+ 1 - 2
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/volume/OMVolumeSetQuotaRequest.java

@@ -34,7 +34,6 @@ import org.apache.hadoop.ozone.om.OzoneManager;
 import org.apache.hadoop.ozone.om.OMMetrics;
 import org.apache.hadoop.ozone.om.exceptions.OMException;
 import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs;
-import org.apache.hadoop.ozone.om.request.OMClientRequest;
 import org.apache.hadoop.ozone.om.response.OMClientResponse;
 import org.apache.hadoop.ozone.om.response.volume.OMVolumeSetQuotaResponse;
 import org.apache.hadoop.ozone.om.response.volume.OMVolumeCreateResponse;
@@ -58,7 +57,7 @@ import static org.apache.hadoop.ozone.om.lock.OzoneManagerLock.Resource.VOLUME_L
 /**
  * Handles set Quota request for volume.
  */
-public class OMVolumeSetQuotaRequest extends OMClientRequest {
+public class OMVolumeSetQuotaRequest extends OMVolumeRequest {
   private static final Logger LOG =
       LoggerFactory.getLogger(OMVolumeSetQuotaRequest.class);
 

+ 73 - 0
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/s3/bucket/S3BucketCreateResponse.java

@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership.  The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.hadoop.ozone.om.response.s3.bucket;
+
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+import java.io.IOException;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.ozone.om.OMMetadataManager;
+import org.apache.hadoop.ozone.om.response.OMClientResponse;
+import org.apache.hadoop.ozone.om.response.bucket.OMBucketCreateResponse;
+import org.apache.hadoop.ozone.om.response.volume.OMVolumeCreateResponse;
+import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos;
+import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
+    .OMResponse;
+import org.apache.hadoop.utils.db.BatchOperation;
+
+/**
+ * Response for S3Bucket create request.
+ */
+public class S3BucketCreateResponse extends OMClientResponse {
+
+  private OMVolumeCreateResponse omVolumeCreateResponse;
+  private OMBucketCreateResponse omBucketCreateResponse;
+  private String s3Bucket;
+  private String s3Mapping;
+
+  public S3BucketCreateResponse(
+      @Nullable OMVolumeCreateResponse omVolumeCreateResponse,
+      @Nullable OMBucketCreateResponse omBucketCreateResponse,
+      @Nullable String s3BucketName,
+      @Nullable String s3Mapping, @Nonnull OMResponse omResponse) {
+    super(omResponse);
+    this.omVolumeCreateResponse = omVolumeCreateResponse;
+    this.omBucketCreateResponse = omBucketCreateResponse;
+    this.s3Bucket = s3BucketName;
+    this.s3Mapping = s3Mapping;
+  }
+
+  @Override
+  public void addToDBBatch(OMMetadataManager omMetadataManager,
+      BatchOperation batchOperation) throws IOException {
+
+    if (getOMResponse().getStatus() == OzoneManagerProtocolProtos.Status.OK) {
+      if (omVolumeCreateResponse != null) {
+        omVolumeCreateResponse.addToDBBatch(omMetadataManager, batchOperation);
+      }
+
+      Preconditions.checkState(omBucketCreateResponse != null);
+      omBucketCreateResponse.addToDBBatch(omMetadataManager, batchOperation);
+
+      omMetadataManager.getS3Table().putWithBatch(batchOperation, s3Bucket,
+          s3Mapping);
+    }
+  }
+}
+

+ 24 - 0
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/response/s3/bucket/package-info.java

@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+/**
+ * Package contains classes related to s3 bucket responses.
+ */
+package org.apache.hadoop.ozone.om.response.s3.bucket;
+

+ 0 - 11
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerHARequestHandler.java

@@ -17,8 +17,6 @@
 
 package org.apache.hadoop.ozone.protocolPB;
 
-import java.io.IOException;
-
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
     .OMRequest;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
@@ -29,15 +27,6 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
  */
 public interface OzoneManagerHARequestHandler extends RequestHandler {
 
-  /**
-   * Handle start Transaction Requests from OzoneManager StateMachine.
-   * @param omRequest
-   * @return OMRequest - New OM Request which will be applied during apply
-   * Transaction
-   * @throws IOException
-   */
-  OMRequest handleStartTransaction(OMRequest omRequest) throws IOException;
-
   /**
    * Handle Apply Transaction Requests from OzoneManager StateMachine.
    * @param omRequest

+ 1 - 168
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerHARequestHandlerImpl.java

@@ -17,41 +17,19 @@
 
 package org.apache.hadoop.ozone.protocolPB;
 
-import java.io.IOException;
-
 import org.apache.hadoop.ozone.om.OzoneManager;
-import org.apache.hadoop.ozone.om.helpers.OmDeleteVolumeResponse;
-import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs;
-import org.apache.hadoop.ozone.om.helpers.OmVolumeOwnerChangeResponse;
 import org.apache.hadoop.ozone.om.ratis.OzoneManagerDoubleBuffer;
 import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils;
 import org.apache.hadoop.ozone.om.request.OMClientRequest;
 import org.apache.hadoop.ozone.om.response.OMClientResponse;
-import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos;
-import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
-    .CreateVolumeRequest;
-import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
-    .CreateVolumeResponse;
-import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
-    .DeleteVolumeRequest;
-import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
-    .DeleteVolumeResponse;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
     .OMRequest;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
     .OMResponse;
-import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
-    .SetVolumePropertyRequest;
-import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
-    .SetVolumePropertyResponse;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
     .Status;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
     .Type;
-import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
-    .VolumeInfo;
-import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
-    .VolumeList;
 
 /**
  * Command Handler for OM requests. OM State Machine calls this handler for
@@ -68,28 +46,6 @@ public class OzoneManagerHARequestHandlerImpl
     this.ozoneManagerDoubleBuffer = ozoneManagerDoubleBuffer;
   }
 
-  @Override
-  public OMRequest handleStartTransaction(OMRequest omRequest)
-      throws IOException {
-    LOG.debug("Received OMRequest: {}, ", omRequest);
-    Type cmdType = omRequest.getCmdType();
-    OMRequest newOmRequest = null;
-    switch (cmdType) {
-    case CreateVolume:
-      newOmRequest = handleCreateVolumeStart(omRequest);
-      break;
-    case SetVolumeProperty:
-      newOmRequest = handleSetVolumePropertyStart(omRequest);
-      break;
-    case DeleteVolume:
-      newOmRequest = handleDeleteVolumeStart(omRequest);
-      break;
-    default:
-      throw new IOException("Unrecognized Command Type:" + cmdType);
-    }
-    return newOmRequest;
-  }
-
 
   @Override
   public OMResponse handleApplyTransaction(OMRequest omRequest,
@@ -111,6 +67,7 @@ public class OzoneManagerHARequestHandlerImpl
     case CreateDirectory:
     case CreateFile:
     case PurgeKeys:
+    case CreateS3Bucket:
       //TODO: We don't need to pass transactionID, this will be removed when
       // complete write requests is changed to new model. And also we can
       // return OMClientResponse, then adding to doubleBuffer can be taken
@@ -135,128 +92,4 @@ public class OzoneManagerHARequestHandlerImpl
       return handle(omRequest);
     }
   }
-
-
-  private OMRequest handleCreateVolumeStart(OMRequest omRequest)
-      throws IOException {
-    VolumeInfo volumeInfo = omRequest.getCreateVolumeRequest().getVolumeInfo();
-    OzoneManagerProtocolProtos.VolumeList volumeList =
-        getOzoneManager().startCreateVolume(
-            OmVolumeArgs.getFromProtobuf(volumeInfo));
-
-    CreateVolumeRequest createVolumeRequest =
-        CreateVolumeRequest.newBuilder().setVolumeInfo(volumeInfo)
-            .setVolumeList(volumeList).build();
-    return omRequest.toBuilder().setCreateVolumeRequest(createVolumeRequest)
-        .build();
-  }
-
-  private CreateVolumeResponse handleCreateVolumeApply(OMRequest omRequest)
-      throws IOException {
-    OzoneManagerProtocolProtos.VolumeInfo volumeInfo =
-        omRequest.getCreateVolumeRequest().getVolumeInfo();
-    VolumeList volumeList =
-        omRequest.getCreateVolumeRequest().getVolumeList();
-    getOzoneManager().applyCreateVolume(
-        OmVolumeArgs.getFromProtobuf(volumeInfo),
-        volumeList);
-    return CreateVolumeResponse.newBuilder().build();
-  }
-
-  private OMRequest handleSetVolumePropertyStart(OMRequest omRequest)
-      throws IOException {
-    SetVolumePropertyRequest setVolumePropertyRequest =
-        omRequest.getSetVolumePropertyRequest();
-    String volume = setVolumePropertyRequest.getVolumeName();
-    OMRequest newOmRequest = null;
-    if (setVolumePropertyRequest.hasQuotaInBytes()) {
-      long quota = setVolumePropertyRequest.getQuotaInBytes();
-      OmVolumeArgs omVolumeArgs =
-          getOzoneManager().startSetQuota(volume, quota);
-      SetVolumePropertyRequest newSetVolumePropertyRequest =
-          SetVolumePropertyRequest.newBuilder().setVolumeName(volume)
-              .setVolumeInfo(omVolumeArgs.getProtobuf()).build();
-      newOmRequest =
-          omRequest.toBuilder().setSetVolumePropertyRequest(
-              newSetVolumePropertyRequest).build();
-    } else {
-      String owner = setVolumePropertyRequest.getOwnerName();
-      OmVolumeOwnerChangeResponse omVolumeOwnerChangeResponse =
-          getOzoneManager().startSetOwner(volume, owner);
-      // If volumeLists become large and as ratis writes the request to disk we
-      // might take more space if the lists become very big in size. We might
-      // need to revisit this if it becomes problem
-      SetVolumePropertyRequest newSetVolumePropertyRequest =
-          SetVolumePropertyRequest.newBuilder().setVolumeName(volume)
-              .setOwnerName(owner)
-              .setOriginalOwner(omVolumeOwnerChangeResponse.getOriginalOwner())
-              .setNewOwnerVolumeList(
-                  omVolumeOwnerChangeResponse.getNewOwnerVolumeList())
-              .setOldOwnerVolumeList(
-                  omVolumeOwnerChangeResponse.getOriginalOwnerVolumeList())
-              .setVolumeInfo(
-                  omVolumeOwnerChangeResponse.getNewOwnerVolumeArgs()
-                      .getProtobuf()).build();
-      newOmRequest =
-          omRequest.toBuilder().setSetVolumePropertyRequest(
-              newSetVolumePropertyRequest).build();
-    }
-    return newOmRequest;
-  }
-
-
-  private SetVolumePropertyResponse handleSetVolumePropertyApply(
-      OMRequest omRequest) throws IOException {
-    SetVolumePropertyRequest setVolumePropertyRequest =
-        omRequest.getSetVolumePropertyRequest();
-
-    if (setVolumePropertyRequest.hasQuotaInBytes()) {
-      getOzoneManager().applySetQuota(
-          OmVolumeArgs.getFromProtobuf(
-              setVolumePropertyRequest.getVolumeInfo()));
-    } else {
-      getOzoneManager().applySetOwner(
-          setVolumePropertyRequest.getOriginalOwner(),
-          setVolumePropertyRequest.getOldOwnerVolumeList(),
-          setVolumePropertyRequest.getNewOwnerVolumeList(),
-          OmVolumeArgs.getFromProtobuf(
-              setVolumePropertyRequest.getVolumeInfo()));
-    }
-    return SetVolumePropertyResponse.newBuilder().build();
-  }
-
-  private OMRequest handleDeleteVolumeStart(OMRequest omRequest)
-      throws IOException {
-    DeleteVolumeRequest deleteVolumeRequest =
-        omRequest.getDeleteVolumeRequest();
-
-    String volume = deleteVolumeRequest.getVolumeName();
-
-    OmDeleteVolumeResponse omDeleteVolumeResponse =
-        getOzoneManager().startDeleteVolume(volume);
-
-    DeleteVolumeRequest newDeleteVolumeRequest =
-        DeleteVolumeRequest.newBuilder().setVolumeList(
-            omDeleteVolumeResponse.getUpdatedVolumeList())
-            .setVolumeName(omDeleteVolumeResponse.getVolume())
-            .setOwner(omDeleteVolumeResponse.getOwner()).build();
-
-    return omRequest.toBuilder().setDeleteVolumeRequest(
-        newDeleteVolumeRequest).build();
-
-  }
-
-
-  private DeleteVolumeResponse handleDeleteVolumeApply(OMRequest omRequest)
-      throws IOException {
-
-    DeleteVolumeRequest deleteVolumeRequest =
-        omRequest.getDeleteVolumeRequest();
-
-    getOzoneManager().applyDeleteVolume(
-        deleteVolumeRequest.getVolumeName(), deleteVolumeRequest.getOwner(),
-        deleteVolumeRequest.getVolumeList());
-
-    return DeleteVolumeResponse.newBuilder().build();
-  }
 }

+ 20 - 0
hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/TestOMRequestUtils.java

@@ -33,6 +33,7 @@ import org.apache.hadoop.ozone.om.helpers.OmBucketInfo;
 import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
 import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup;
 import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs;
+import org.apache.hadoop.ozone.om.request.s3.bucket.S3BucketCreateRequest;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
     .OMRequest;
@@ -146,6 +147,12 @@ public final class TestOMRequestUtils {
     addVolumeToDB(volumeName, UUID.randomUUID().toString(), omMetadataManager);
   }
 
+  public static void addS3BucketToDB(String volumeName, String s3BucketName,
+      OMMetadataManager omMetadataManager) throws Exception {
+    omMetadataManager.getS3Table().put(s3BucketName,
+        S3BucketCreateRequest.formatS3MappingName(volumeName, s3BucketName));
+  }
+
   /**
    * Add volume creation entry to OM DB.
    * @param volumeName
@@ -183,6 +190,19 @@ public final class TestOMRequestUtils {
         .setClientId(UUID.randomUUID().toString()).build();
   }
 
+  public static OzoneManagerProtocolProtos.OMRequest createS3BucketRequest(
+      String userName, String s3BucketName) {
+    OzoneManagerProtocolProtos.S3CreateBucketRequest request =
+        OzoneManagerProtocolProtos.S3CreateBucketRequest.newBuilder()
+            .setUserName(userName)
+            .setS3Bucketname(s3BucketName).build();
+
+    return OzoneManagerProtocolProtos.OMRequest.newBuilder()
+        .setCreateS3BucketRequest(request)
+        .setCmdType(OzoneManagerProtocolProtos.Type.CreateS3Bucket)
+        .setClientId(UUID.randomUUID().toString()).build();
+  }
+
   public static List< HddsProtos.KeyValue> getMetadataList() {
     List<HddsProtos.KeyValue> metadataList = new ArrayList<>();
     metadataList.add(HddsProtos.KeyValue.newBuilder().setKey("key1").setValue(

+ 246 - 0
hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/bucket/TestS3BucketCreateRequest.java

@@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.ozone.om.request.s3.bucket;
+
+import java.util.UUID;
+
+import org.apache.commons.lang.RandomStringUtils;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.mockito.Mockito;
+
+import org.apache.hadoop.ozone.om.exceptions.OMException;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.audit.AuditLogger;
+import org.apache.hadoop.ozone.audit.AuditMessage;
+import org.apache.hadoop.ozone.om.OMConfigKeys;
+import org.apache.hadoop.ozone.om.OMMetadataManager;
+import org.apache.hadoop.ozone.om.OMMetrics;
+import org.apache.hadoop.ozone.om.OmMetadataManagerImpl;
+import org.apache.hadoop.ozone.om.OzoneManager;
+import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
+    .OMRequest;
+import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos
+    .OMResponse;
+import org.apache.hadoop.ozone.om.request.TestOMRequestUtils;
+import org.apache.hadoop.ozone.om.response.OMClientResponse;
+import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos;
+
+import static org.junit.Assert.fail;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.when;
+
+/**
+ * Tests S3BucketCreateRequest class, which handles S3 CreateBucket request.
+ */
+public class TestS3BucketCreateRequest {
+
+  @Rule
+  public TemporaryFolder folder = new TemporaryFolder();
+
+  private OzoneManager ozoneManager;
+  private OMMetrics omMetrics;
+  private OMMetadataManager omMetadataManager;
+  private AuditLogger auditLogger;
+
+
+  @Before
+  public void setup() throws Exception {
+
+    ozoneManager = Mockito.mock(OzoneManager.class);
+    omMetrics = OMMetrics.create();
+    OzoneConfiguration ozoneConfiguration = new OzoneConfiguration();
+    ozoneConfiguration.set(OMConfigKeys.OZONE_OM_DB_DIRS,
+        folder.newFolder().getAbsolutePath());
+    omMetadataManager = new OmMetadataManagerImpl(ozoneConfiguration);
+    when(ozoneManager.getMetrics()).thenReturn(omMetrics);
+    when(ozoneManager.getMetadataManager()).thenReturn(omMetadataManager);
+    auditLogger = Mockito.mock(AuditLogger.class);
+    when(ozoneManager.getAuditLogger()).thenReturn(auditLogger);
+    Mockito.doNothing().when(auditLogger).logWrite(any(AuditMessage.class));
+  }
+
+  @After
+  public void stop() {
+    omMetrics.unRegister();
+    Mockito.framework().clearInlineMocks();
+  }
+
+
+  @Test
+  public void testPreExecute() throws Exception {
+    String userName = UUID.randomUUID().toString();
+    String s3BucketName = UUID.randomUUID().toString();
+    doPreExecute(userName, s3BucketName);
+  }
+
+  @Test
+  public void testPreExecuteInvalidBucketLength() throws Exception {
+    String userName = UUID.randomUUID().toString();
+
+    // set bucket name which is less than 3 characters length
+    String s3BucketName = RandomStringUtils.randomAlphabetic(2);
+
+    try {
+      doPreExecute(userName, s3BucketName);
+      fail("testPreExecuteInvalidBucketLength failed");
+    } catch (OMException ex) {
+      GenericTestUtils.assertExceptionContains("S3_BUCKET_INVALID_LENGTH", ex);
+    }
+
+    // set bucket name which is greater than 63 characters length
+    s3BucketName = RandomStringUtils.randomAlphabetic(64);
+
+    try {
+      doPreExecute(userName, s3BucketName);
+      fail("testPreExecuteInvalidBucketLength failed");
+    } catch (OMException ex) {
+      GenericTestUtils.assertExceptionContains("S3_BUCKET_INVALID_LENGTH", ex);
+    }
+  }
+
+
+  @Test
+  public void testValidateAndUpdateCache() throws Exception {
+    String userName = UUID.randomUUID().toString();
+    String s3BucketName = UUID.randomUUID().toString();
+
+    S3BucketCreateRequest s3BucketCreateRequest = doPreExecute(userName,
+        s3BucketName);
+
+    doValidateAndUpdateCache(userName, s3BucketName,
+        s3BucketCreateRequest.getOmRequest());
+
+  }
+
+
+  @Test
+  public void testValidateAndUpdateCacheWithS3BucketAlreadyExists()
+      throws Exception {
+    String userName = UUID.randomUUID().toString();
+    String s3BucketName = UUID.randomUUID().toString();
+
+    TestOMRequestUtils.addS3BucketToDB(
+        S3BucketCreateRequest.formatOzoneVolumeName(userName), s3BucketName,
+        omMetadataManager);
+
+    S3BucketCreateRequest s3BucketCreateRequest =
+        doPreExecute(userName, s3BucketName);
+
+
+    // Try create same bucket again
+    OMClientResponse omClientResponse =
+        s3BucketCreateRequest.validateAndUpdateCache(ozoneManager, 2);
+
+    OMResponse omResponse = omClientResponse.getOMResponse();
+    Assert.assertNotNull(omResponse.getCreateBucketResponse());
+    Assert.assertEquals(
+        OzoneManagerProtocolProtos.Status.S3_BUCKET_ALREADY_EXISTS,
+        omResponse.getStatus());
+  }
+
+  @Test
+  public void testValidateAndUpdateCacheWithBucketAlreadyExists()
+      throws Exception {
+    String userName = UUID.randomUUID().toString();
+    String s3BucketName = UUID.randomUUID().toString();
+
+    S3BucketCreateRequest s3BucketCreateRequest =
+        doPreExecute(userName, s3BucketName);
+
+    TestOMRequestUtils.addVolumeAndBucketToDB(
+        s3BucketCreateRequest.formatOzoneVolumeName(userName),
+        s3BucketName, omMetadataManager);
+
+
+    // Try create same bucket again
+    OMClientResponse omClientResponse =
+        s3BucketCreateRequest.validateAndUpdateCache(ozoneManager, 2);
+
+    OMResponse omResponse = omClientResponse.getOMResponse();
+    Assert.assertNotNull(omResponse.getCreateBucketResponse());
+    Assert.assertEquals(OzoneManagerProtocolProtos.Status.BUCKET_ALREADY_EXISTS,
+        omResponse.getStatus());
+  }
+
+
+
+  private S3BucketCreateRequest doPreExecute(String userName,
+      String s3BucketName) throws Exception {
+    OMRequest originalRequest =
+        TestOMRequestUtils.createS3BucketRequest(userName, s3BucketName);
+
+    S3BucketCreateRequest s3BucketCreateRequest =
+        new S3BucketCreateRequest(originalRequest);
+
+    OMRequest modifiedRequest = s3BucketCreateRequest.preExecute(ozoneManager);
+    // Modification time will be set, so requests should not be equal.
+    Assert.assertNotEquals(originalRequest, modifiedRequest);
+    return new S3BucketCreateRequest(modifiedRequest);
+  }
+
+  private void doValidateAndUpdateCache(String userName, String s3BucketName,
+      OMRequest modifiedRequest) throws Exception {
+
+    // As we have not still called validateAndUpdateCache, get() should
+    // return null.
+
+    Assert.assertNull(omMetadataManager.getS3Table().get(s3BucketName));
+    S3BucketCreateRequest s3BucketCreateRequest =
+        new S3BucketCreateRequest(modifiedRequest);
+
+
+    OMClientResponse omClientResponse =
+        s3BucketCreateRequest.validateAndUpdateCache(ozoneManager, 1);
+
+    // As now after validateAndUpdateCache it should add entry to cache, get
+    // should return non null value.
+
+    Assert.assertNotNull(omMetadataManager.getS3Table().get(s3BucketName));
+
+    String bucketKey =
+        omMetadataManager.getBucketKey(
+            s3BucketCreateRequest.formatOzoneVolumeName(userName),
+            s3BucketName);
+
+    // check ozone bucket entry is created or not.
+    Assert.assertNotNull(omMetadataManager.getBucketTable().get(bucketKey));
+
+    String volumeKey = omMetadataManager.getVolumeKey(
+        s3BucketCreateRequest.formatOzoneVolumeName(userName));
+
+    // Check volume entry is created or not.
+    Assert.assertNotNull(omMetadataManager.getVolumeTable().get(volumeKey));
+
+    // check om response.
+    Assert.assertEquals(OzoneManagerProtocolProtos.Status.OK,
+        omClientResponse.getOMResponse().getStatus());
+    Assert.assertEquals(OzoneManagerProtocolProtos.Type.CreateS3Bucket,
+        omClientResponse.getOMResponse().getCmdType());
+
+  }
+
+}
+

+ 23 - 0
hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/s3/bucket/package-info.java

@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+/**
+ * Package contains test classes for s3 bucket requests.
+ */
+package org.apache.hadoop.ozone.om.request.s3.bucket;

+ 25 - 2
hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/volume/TestOMVolumeCreateRequest.java

@@ -133,7 +133,7 @@ public class TestOMVolumeCreateRequest {
     OMVolumeCreateRequest omVolumeCreateRequest =
         new OMVolumeCreateRequest(originalRequest);
 
-    omVolumeCreateRequest.preExecute(ozoneManager);
+    OMRequest modifiedRequest = omVolumeCreateRequest.preExecute(ozoneManager);
 
     String volumeKey = omMetadataManager.getVolumeKey(volumeName);
     String ownerKey = omMetadataManager.getUserKey(ownerName);
@@ -144,6 +144,8 @@ public class TestOMVolumeCreateRequest {
     Assert.assertNull(omMetadataManager.getVolumeTable().get(volumeKey));
     Assert.assertNull(omMetadataManager.getUserTable().get(ownerKey));
 
+    omVolumeCreateRequest = new OMVolumeCreateRequest(modifiedRequest);
+
     OMClientResponse omClientResponse =
         omVolumeCreateRequest.validateAndUpdateCache(ozoneManager, 1);
 
@@ -175,6 +177,25 @@ public class TestOMVolumeCreateRequest {
     Assert.assertNotNull(volumeList);
     Assert.assertEquals(volumeName, volumeList.getVolumeNames(0));
 
+    // Create another volume for the user.
+    originalRequest = createVolumeRequest("vol1", adminName,
+        ownerName);
+
+    omVolumeCreateRequest =
+        new OMVolumeCreateRequest(originalRequest);
+
+    modifiedRequest = omVolumeCreateRequest.preExecute(ozoneManager);
+
+    omClientResponse =
+        omVolumeCreateRequest.validateAndUpdateCache(ozoneManager, 2L);
+
+    Assert.assertEquals(OzoneManagerProtocolProtos.Status.OK,
+        omClientResponse.getOMResponse().getStatus());
+
+    Assert.assertTrue(omMetadataManager
+        .getUserTable().get(ownerKey).getVolumeNamesList().size() == 2);
+
+
   }
 
 
@@ -193,7 +214,9 @@ public class TestOMVolumeCreateRequest {
     OMVolumeCreateRequest omVolumeCreateRequest =
         new OMVolumeCreateRequest(originalRequest);
 
-    omVolumeCreateRequest.preExecute(ozoneManager);
+    OMRequest modifiedRequest = omVolumeCreateRequest.preExecute(ozoneManager);
+
+    omVolumeCreateRequest = new OMVolumeCreateRequest(modifiedRequest);
 
     OMClientResponse omClientResponse =
         omVolumeCreateRequest.validateAndUpdateCache(ozoneManager, 1);

+ 6 - 12
hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/request/volume/TestOMVolumeDeleteRequest.java

@@ -90,8 +90,7 @@ public class TestOMVolumeDeleteRequest {
   @Test
   public void testPreExecute() throws Exception {
     String volumeName = UUID.randomUUID().toString();
-    String ownerName = UUID.randomUUID().toString();
-    OMRequest originalRequest = deleteVolumeRequest(volumeName, ownerName);
+    OMRequest originalRequest = deleteVolumeRequest(volumeName);
 
     OMVolumeDeleteRequest omVolumeDeleteRequest =
         new OMVolumeDeleteRequest(originalRequest);
@@ -106,7 +105,7 @@ public class TestOMVolumeDeleteRequest {
     String volumeName = UUID.randomUUID().toString();
     String ownerName = "user1";
 
-    OMRequest originalRequest = deleteVolumeRequest(volumeName, ownerName);
+    OMRequest originalRequest = deleteVolumeRequest(volumeName);
 
     OMVolumeDeleteRequest omVolumeDeleteRequest =
         new OMVolumeDeleteRequest(originalRequest);
@@ -147,9 +146,7 @@ public class TestOMVolumeDeleteRequest {
   public void testValidateAndUpdateCacheWithVolumeNotFound()
       throws Exception {
     String volumeName = UUID.randomUUID().toString();
-    String ownerName = "user1";
-
-    OMRequest originalRequest = deleteVolumeRequest(volumeName, ownerName);
+    OMRequest originalRequest = deleteVolumeRequest(volumeName);
 
     OMVolumeDeleteRequest omVolumeDeleteRequest =
         new OMVolumeDeleteRequest(originalRequest);
@@ -173,7 +170,7 @@ public class TestOMVolumeDeleteRequest {
     String volumeName = UUID.randomUUID().toString();
     String ownerName = "user1";
 
-    OMRequest originalRequest = deleteVolumeRequest(volumeName, ownerName);
+    OMRequest originalRequest = deleteVolumeRequest(volumeName);
 
     OMVolumeDeleteRequest omVolumeDeleteRequest =
         new OMVolumeDeleteRequest(originalRequest);
@@ -206,14 +203,11 @@ public class TestOMVolumeDeleteRequest {
   /**
    * Create OMRequest for delete volume.
    * @param volumeName
-   * @param ownerName
    * @return OMRequest
    */
-  private OMRequest deleteVolumeRequest(String volumeName,
-      String ownerName) {
+  private OMRequest deleteVolumeRequest(String volumeName) {
     DeleteVolumeRequest deleteVolumeRequest =
-        DeleteVolumeRequest.newBuilder().setVolumeName(volumeName)
-            .setOwner(ownerName).build();
+        DeleteVolumeRequest.newBuilder().setVolumeName(volumeName).build();
 
     return OMRequest.newBuilder().setClientId(UUID.randomUUID().toString())
         .setCmdType(OzoneManagerProtocolProtos.Type.DeleteVolume)

+ 119 - 0
hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/s3/bucket/TestS3BucketCreateResponse.java

@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.hadoop.ozone.om.response.s3.bucket;
+
+import java.util.UUID;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.om.OMConfigKeys;
+import org.apache.hadoop.ozone.om.OMMetadataManager;
+import org.apache.hadoop.ozone.om.OmMetadataManagerImpl;
+import org.apache.hadoop.ozone.om.helpers.OmBucketInfo;
+import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs;
+import org.apache.hadoop.ozone.om.request.s3.bucket.S3BucketCreateRequest;
+import org.apache.hadoop.ozone.om.response.TestOMResponseUtils;
+import org.apache.hadoop.ozone.om.response.bucket.OMBucketCreateResponse;
+import org.apache.hadoop.ozone.om.response.volume.OMVolumeCreateResponse;
+import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos;
+import org.apache.hadoop.util.Time;
+import org.apache.hadoop.utils.db.BatchOperation;
+
+/**
+ * Class to test S3BucketCreateResponse.
+ */
+public class TestS3BucketCreateResponse {
+
+  @Rule
+  public TemporaryFolder folder = new TemporaryFolder();
+
+  private OMMetadataManager omMetadataManager;
+  private BatchOperation batchOperation;
+
+  @Before
+  public void setup() throws Exception {
+    OzoneConfiguration ozoneConfiguration = new OzoneConfiguration();
+    ozoneConfiguration.set(OMConfigKeys.OZONE_OM_DB_DIRS,
+        folder.newFolder().getAbsolutePath());
+    omMetadataManager = new OmMetadataManagerImpl(ozoneConfiguration);
+    batchOperation = omMetadataManager.getStore().initBatchOperation();
+  }
+
+
+  @Test
+  public void testAddToDBBatch() throws Exception {
+    String userName = UUID.randomUUID().toString();
+    String s3BucketName = UUID.randomUUID().toString();
+
+    OzoneManagerProtocolProtos.OMResponse omResponse =
+        OzoneManagerProtocolProtos.OMResponse.newBuilder()
+            .setCmdType(OzoneManagerProtocolProtos.Type.CreateS3Bucket)
+            .setStatus(OzoneManagerProtocolProtos.Status.OK)
+            .setSuccess(true)
+            .setCreateS3BucketResponse(
+                OzoneManagerProtocolProtos.S3CreateBucketResponse
+                    .getDefaultInstance())
+            .build();
+
+    String volumeName = S3BucketCreateRequest.formatOzoneVolumeName(userName);
+    OzoneManagerProtocolProtos.VolumeList volumeList =
+        OzoneManagerProtocolProtos.VolumeList.newBuilder()
+            .addVolumeNames(volumeName).build();
+
+    OmVolumeArgs omVolumeArgs = OmVolumeArgs.newBuilder()
+        .setOwnerName(userName).setAdminName(userName)
+        .setVolume(volumeName).setCreationTime(Time.now()).build();
+
+    OMVolumeCreateResponse omVolumeCreateResponse =
+        new OMVolumeCreateResponse(omVolumeArgs, volumeList, omResponse);
+
+
+    OmBucketInfo omBucketInfo = TestOMResponseUtils.createBucket(
+        volumeName, s3BucketName);
+    OMBucketCreateResponse omBucketCreateResponse =
+        new OMBucketCreateResponse(omBucketInfo, omResponse);
+
+    String s3Mapping = S3BucketCreateRequest.formatS3MappingName(volumeName,
+        s3BucketName);
+    S3BucketCreateResponse s3BucketCreateResponse =
+        new S3BucketCreateResponse(omVolumeCreateResponse,
+            omBucketCreateResponse, s3BucketName, s3Mapping, omResponse);
+
+    s3BucketCreateResponse.addToDBBatch(omMetadataManager, batchOperation);
+
+    // Do manual commit and see whether addToBatch is successful or not.
+    omMetadataManager.getStore().commitBatchOperation(batchOperation);
+
+    Assert.assertNotNull(omMetadataManager.getS3Table().get(s3BucketName));
+    Assert.assertEquals(s3Mapping,
+        omMetadataManager.getS3Table().get(s3BucketName));
+    Assert.assertNotNull(omMetadataManager.getVolumeTable().get(
+        omMetadataManager.getVolumeKey(volumeName)));
+    Assert.assertNotNull(omMetadataManager.getBucketTable().get(
+        omMetadataManager.getBucketKey(volumeName, s3BucketName)));
+
+  }
+}
+

+ 23 - 0
hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/response/s3/bucket/package-info.java

@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+/**
+ * Package contains test classes for s3 bucket responses.
+ */
+package org.apache.hadoop.ozone.om.response.s3.bucket;

+ 2 - 39
hadoop-ozone/ozonefs/src/main/java/org/apache/hadoop/fs/ozone/BasicOzoneFileSystem.java

@@ -563,51 +563,14 @@ public class BasicOzoneFileSystem extends FileSystem {
   }
 
   /**
-   * Check whether the path is valid and then create directories.
-   * Directory is represented using a key with no value.
-   * All the non-existent parent directories are also created.
+   * Creates a directory. Directory is represented using a key with no value.
    *
    * @param path directory path to be created
    * @return true if directory exists or created successfully.
    * @throws IOException
    */
   private boolean mkdir(Path path) throws IOException {
-    Path fPart = path;
-    Path prevfPart = null;
-    do {
-      LOG.trace("validating path:{}", fPart);
-      try {
-        FileStatus fileStatus = getFileStatus(fPart);
-        if (fileStatus.isDirectory()) {
-          // If path exists and a directory, exit
-          break;
-        } else {
-          // Found a file here, rollback and delete newly created directories
-          LOG.trace("Found a file with same name as directory, path:{}", fPart);
-          if (prevfPart != null) {
-            delete(prevfPart, true);
-          }
-          throw new FileAlreadyExistsException(String.format(
-              "Can't make directory for path '%s', it is a file.", fPart));
-        }
-      } catch (FileNotFoundException | OMException fnfe) {
-        LOG.trace("creating directory for fpart:{}", fPart);
-        String key = pathToKey(fPart);
-        String dirKey = addTrailingSlashIfNeeded(key);
-        if (!adapter.createDirectory(dirKey)) {
-          // Directory creation failed here,
-          // rollback and delete newly created directories
-          LOG.trace("Directory creation failed, path:{}", fPart);
-          if (prevfPart != null) {
-            delete(prevfPart, true);
-          }
-          return false;
-        }
-      }
-      prevfPart = fPart;
-      fPart = fPart.getParent();
-    } while (fPart != null);
-    return true;
+    return adapter.createDirectory(pathToKey(path));
   }
 
   @Override

+ 7 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java

@@ -313,7 +313,13 @@ public class MiniYARNCluster extends CompositeService {
         YarnConfiguration.DEFAULT_TIMELINE_SERVICE_ENABLED) || enableAHS) {
         addService(new ApplicationHistoryServerWrapper());
     }
-    
+    // to ensure that any FileSystemNodeAttributeStore started by the RM always
+    // uses a unique path, if unset, force it under the test dir.
+    if (conf.get(YarnConfiguration.FS_NODE_ATTRIBUTE_STORE_ROOT_DIR) == null) {
+      File nodeAttrDir = new File(getTestWorkDir(), "nodeattributes");
+      conf.set(YarnConfiguration.FS_NODE_ATTRIBUTE_STORE_ROOT_DIR,
+          nodeAttrDir.getCanonicalPath());
+    }
     super.serviceInit(
         conf instanceof YarnConfiguration ? conf : new YarnConfiguration(conf));
   }

+ 4 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/YarnApplicationSecurity.md

@@ -114,7 +114,7 @@ supplied this way.
 
 This means you have a relative similar workflow across secure and insecure clusters.
 
-1. Suring AM startup, log in to Kerberos.
+1. During AM startup, log in to Kerberos.
 A call to `UserGroupInformation.isSecurityEnabled()` will trigger this operation.
 
 1. Enumerate the current user's credentials, through a call of
@@ -144,7 +144,7 @@ than the AMRM and timeline tokens.
 
 Here are the different strategies
 
-1. Don't. Rely on the lifespan of the application being so short that token
+1. Don't rely on the lifespan of the application being so short that token
 renewal is not needed. For applications whose life can always be measured
 in minutes or tens of minutes, this is a viable strategy.
 
@@ -156,7 +156,7 @@ This what most YARN applications do.
 
 ### AM/RM Token Refresh
 
-The AM/RM token is renewed automatically; the AM pushes out a new token
+The AM/RM token is renewed automatically; the RM sends out a new token
 to the AM within an `allocate` message. Consult the `AMRMClientImpl` class
 to see the process. *Your AM code does not need to worry about this process*
 
@@ -191,7 +191,7 @@ token. Consult `UnmanagedAMLauncher` for the specifics.
 ### Identity on an insecure cluster: `HADOOP_USER_NAME`
 
 In an insecure cluster, the application will run as the identity of
-the account of the node manager, typically something such as `yarn`
+the account of the node manager, such as `yarn`
 or `mapred`. By default, the application will access HDFS
 as that user, with a different home directory, and with
 a different user identified in audit logs and on file system owner attributes.