فهرست منبع

HDFS-7147. Update archival storage user documentation. Contributed by Tsz Wo Nicholas Sze.

Haohui Mai 10 سال پیش
والد
کامیت
35d353e0f6

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -993,6 +993,9 @@ Release 2.6.0 - UNRELEASED
     HADOOP-11233. hadoop.security.kms.client.encrypted.key.cache.expiry
     HADOOP-11233. hadoop.security.kms.client.encrypted.key.cache.expiry
     property spelled wrong in core-default. (Stephen Chu via yliu) 
     property spelled wrong in core-default. (Stephen Chu via yliu) 
 
 
+    HDFS-7147. Update archival storage user documentation.
+    (Tsz Wo Nicholas Sze via wheat9)
+
     BREAKDOWN OF HDFS-6134 AND HADOOP-10150 SUBTASKS AND RELATED JIRAS
     BREAKDOWN OF HDFS-6134 AND HADOOP-10150 SUBTASKS AND RELATED JIRAS
   
   
       HDFS-6387. HDFS CLI admin tool for creating & deleting an
       HDFS-6387. HDFS CLI admin tool for creating & deleting an

+ 5 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockStoragePolicySuite.java

@@ -18,7 +18,9 @@
 package org.apache.hadoop.hdfs.server.blockmanagement;
 package org.apache.hadoop.hdfs.server.blockmanagement;
 
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Lists;
+
 import org.apache.hadoop.fs.XAttr;
 import org.apache.hadoop.fs.XAttr;
 import org.apache.hadoop.hdfs.StorageType;
 import org.apache.hadoop.hdfs.StorageType;
 import org.apache.hadoop.hdfs.XAttrHelper;
 import org.apache.hadoop.hdfs.XAttrHelper;
@@ -104,9 +106,11 @@ public class BlockStoragePolicySuite {
   }
   }
 
 
   public BlockStoragePolicy getPolicy(String policyName) {
   public BlockStoragePolicy getPolicy(String policyName) {
+    Preconditions.checkNotNull(policyName);
+
     if (policies != null) {
     if (policies != null) {
       for (BlockStoragePolicy policy : policies) {
       for (BlockStoragePolicy policy : policies) {
-        if (policy != null && policy.getName().equals(policyName)) {
+        if (policy != null && policy.getName().equalsIgnoreCase(policyName)) {
           return policy;
           return policy;
         }
         }
       }
       }

+ 0 - 118
hadoop-hdfs-project/hadoop-hdfs/src/main/resources/blockStoragePolicy-default.xml

@@ -1,118 +0,0 @@
-<?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
-
-<!-- Do not modify this file directly.  Instead, copy entries that you wish -->
-<!-- to modify from this file into blockStoragePolicy-site.xml and change   -->
-<!-- there.  If blockStoragePolicy-site.xml does not exist, create it.      -->
-
-<configuration>
-<property>
-  <name>dfs.block.storage.policies</name>
-  <value>HOT:12, WARM:8, COLD:4</value>
-  <description>
-    A list of block storage policy names and IDs.  The syntax is
-
-      NAME_1:ID_1, NAME_2:ID_2, ..., NAME_n:ID_n
-
-    where ID is an integer in the range [1,15] and NAME is case insensitive.
-    The first element is the default policy.  Empty list is not allowed.
-  </description>
-</property>
-
-<!-- Block Storage Policy HOT:12 -->
-<property>
-  <name>dfs.block.storage.policy.12</name>
-  <value>DISK</value>
-  <description>
-    A list of storage types for storing the block replicas such as
-
-      STORAGE_TYPE_1, STORAGE_TYPE_2, ..., STORAGE_TYPE_n
-  
-    When creating a block, the i-th replica is stored using i-th storage type
-    for i less than or equal to n, and
-    the j-th replica is stored using n-th storage type for j greater than n.
-
-    Empty list is not allowed.
-
-    Examples:
-    DISK          : all replicas stored using DISK.
-    DISK, ARCHIVE : the first replica is stored using DISK and all the
-                    remaining replicas are stored using ARCHIVE.
-  </description>
-</property>
-
-<property>
-  <name>dfs.block.storage.policy.creation-fallback.12</name>
-  <value></value>
-  <description>
-    A list of storage types for creation fallback storage.
-
-      STORAGE_TYPE_1, STORAGE_TYPE_2, ..., STORAGE_TYPE_n
-  
-    When creating a block, if a particular storage type specified in the policy
-    is unavailable, the fallback STORAGE_TYPE_1 is used.  Further, if
-    STORAGE_TYPE_i is also unavailable, the fallback STORAGE_TYPE_(i+1) is used.
-    In case that all fallback storages are unavailabe, the block will be created
-    with number of replicas less than the specified replication factor.
-
-    An empty list indicates that there is no fallback storage.
-  </description>
-</property>
-
-<property>
-  <name>dfs.block.storage.policy.replication-fallback.12</name>
-  <value>ARCHIVE</value>
-  <description>
-    Similar to dfs.block.storage.policy.creation-fallback.x but for replication.
-  </description>
-</property>
-
-<!-- Block Storage Policy WARM:8 -->
-<property>
-  <name>dfs.block.storage.policy.8</name>
-  <value>DISK, ARCHIVE</value>
-</property>
-
-<property>
-  <name>dfs.block.storage.policy.creation-fallback.8</name>
-  <value>DISK, ARCHIVE</value>
-</property>
-
-<property>
-  <name>dfs.block.storage.policy.replication-fallback.8</name>
-  <value>DISK, ARCHIVE</value>
-</property>
-
-<!-- Block Storage Policy COLD:4 -->
-<property>
-  <name>dfs.block.storage.policy.4</name>
-  <value>ARCHIVE</value>
-</property>
-
-<property>
-  <name>dfs.block.storage.policy.creation-fallback.4</name>
-  <value></value>
-</property>
-
-<property>
-  <name>dfs.block.storage.policy.replication-fallback.4</name>
-  <value></value>
-</property>
-</configuration>

+ 70 - 139
hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ArchivalStorage.apt.vm

@@ -11,12 +11,12 @@
 ~~ limitations under the License. See accompanying LICENSE file.
 ~~ limitations under the License. See accompanying LICENSE file.
 
 
   ---
   ---
-  HDFS Archival Storage
+  Archival Storage, SSD & Memory
   ---
   ---
   ---
   ---
   ${maven.build.timestamp}
   ${maven.build.timestamp}
 
 
-HDFS Archival Storage
+Archival Storage, SSD & Memory
 
 
 %{toc|section=1|fromDepth=0}
 %{toc|section=1|fromDepth=0}
 
 
@@ -29,9 +29,13 @@ HDFS Archival Storage
   Adding more nodes to the cold storage can grow the storage independent of the compute capacity
   Adding more nodes to the cold storage can grow the storage independent of the compute capacity
   in the cluster.
   in the cluster.
 
 
+  The frameworks provided by Heterogeneous Storage and Archival Storage generalizes the HDFS architecture
+  to include other kinds of storage media including <SSD> and <memory>.
+  Users may choose to store their data in SSD or memory for a better performance.
+
 * {Storage Types and Storage Policies}
 * {Storage Types and Storage Policies}
 
 
-** {Storage Types: DISK, SSD and ARCHIVE}
+** {Storage Types: ARCHIVE, DISK, SSD and RAM_DISK}
 
 
   The first phase of
   The first phase of
   {{{https://issues.apache.org/jira/browse/HDFS-2832}Heterogeneous Storage (HDFS-2832)}}
   {{{https://issues.apache.org/jira/browse/HDFS-2832}Heterogeneous Storage (HDFS-2832)}}
@@ -45,7 +49,9 @@ HDFS Archival Storage
   which has high storage density (petabyte of storage) but little compute power,
   which has high storage density (petabyte of storage) but little compute power,
   is added for supporting archival storage.
   is added for supporting archival storage.
 
 
-** {Storage Policies: Hot, Warm and Cold}
+  Another new storage type <RAM_DISK> is added for supporting writing single replica files in memory.
+
+** {Storage Policies: Hot, Warm, Cold, All_SSD, One_SSD and Lazy_Persist}
 
 
   A new concept of storage policies is introduced in order to allow files to be stored
   A new concept of storage policies is introduced in order to allow files to be stored
   in different storage types according to the storage policy.
   in different storage types according to the storage policy.
@@ -65,6 +71,14 @@ HDFS Archival Storage
                When a block is warm, some of its replicas are stored in DISK
                When a block is warm, some of its replicas are stored in DISK
                and the remaining replicas are stored in ARCHIVE.
                and the remaining replicas are stored in ARCHIVE.
 
 
+  * <<All_SSD>> - for storing all replicas in SSD.
+
+  * <<One_SSD>> - for storing one of the replicas in SSD.
+                  The remaining replicas are stored in DISK.
+
+  * <<Lazy_Persist>> - for writing blocks with single replica in memory.
+                       The replica is first written in RAM_DISK and then it is lazily persisted in DISK.
+
   []
   []
 
 
   More formally, a storage policy consists of the following fields:
   More formally, a storage policy consists of the following fields:
@@ -89,149 +103,54 @@ HDFS Archival Storage
 
 
   The following is a typical storage policy table.
   The following is a typical storage policy table.
 
 
-*--------+---------------+-------------------------+-----------------------+-----------------------+
-| <<Policy>> | <<Policy>>| <<Block Placement>>     | <<Fallback storages>> | <<Fallback storages>> |
-| <<ID>>     | <<Name>>  | <<(n\ replicas)>>      | <<for creation>>      | <<for replication>>   |
-*--------+---------------+-------------------------+-----------------------+-----------------------+
-| 12     | Hot (default) | DISK: <n>               | \<none\>              | ARCHIVE               |
-*--------+---------------+-------------------------+-----------------------+-----------------------+
-| 8      | Warm          | DISK: 1, ARCHIVE: <n>-1 | ARCHIVE, DISK         | ARCHIVE, DISK         |
-*--------+---------------+-------------------------+-----------------------+-----------------------+
-| 4      | Cold          | ARCHIVE: <n>            | \<none\>              | \<none\>              |
-*--------+---------------+-------------------------+-----------------------+-----------------------+
-
-  Note that cluster administrators may change the storage policy table
-  according to the characteristic of the cluster.
-  For example, in order to prevent losing archival data,
-  administrators may want to use DISK as fallback storage for replication in the Cold policy.
-  A drawback of such setting is that the DISK storages could be filled up with archival data.
-  As a result, the entire cluster may become full and cannot serve hot data anymore.
-
-** {Configurations}
-
-*** {Setting The List of All Storage Policies}
-
-  * <<dfs.block.storage.policies>>
-    - a list of block storage policy names and IDs.
-    The syntax is
-
-      NAME_1:ID_1, NAME_2:ID_2, ..., NAME_<n>:ID_<n>
-
-    where ID is an integer in the closed range [1,15] and NAME is case insensitive.
-    The first element is the <default policy>.  Empty list is not allowed.
-
-    The default value is shown below.
-
-+------------------------------------------+
-<property>
-  <name>dfs.block.storage.policies</name>
-  <value>HOT:12, WARM:8, COLD:4</value>
-</property>
-+------------------------------------------+
+*--------+---------------+--------------------------+-----------------------+-----------------------+
+| <<Policy>> | <<Policy>>| <<Block Placement>>      | <<Fallback storages>> | <<Fallback storages>> |
+| <<ID>>     | <<Name>>  | <<(n\ replicas)>>        | <<for creation>>      | <<for replication>>   |
+*--------+---------------+--------------------------+-----------------------+-----------------------+
+| 15     | Lasy_Persist  | RAM_DISK: 1, DISK: <n>-1 | DISK                  | DISK                  |
+*--------+---------------+--------------------------+-----------------------+-----------------------+
+| 12     | All_SSD       | SSD: <n>                 | DISK                  | DISK                  |
+*--------+---------------+--------------------------+-----------------------+-----------------------+
+| 10     | One_SSD       | SSD: 1, DISK: <n>-1      | SSD, DISK             | SSD, DISK             |
+*--------+---------------+--------------------------+-----------------------+-----------------------+
+| 7      | Hot (default) | DISK: <n>                | \<none\>              | ARCHIVE               |
+*--------+---------------+--------------------------+-----------------------+-----------------------+
+| 5      | Warm          | DISK: 1, ARCHIVE: <n>-1  | ARCHIVE, DISK         | ARCHIVE, DISK         |
+*--------+---------------+--------------------------+-----------------------+-----------------------+
+| 2      | Cold          | ARCHIVE: <n>             | \<none\>              | \<none\>              |
+*--------+---------------+--------------------------+-----------------------+-----------------------+
+
+  Note that the Lasy_Persist policy is useful only for single replica blocks.
+  For blocks with more than one replicas, all the replicas will be written to DISK
+  since writing only one of the replicas to RAM_DISK does not improve the overall performance.
+
+** {Storage Policy Resolution}
+
+  When a file or directory is created, its storage policy is <unspecified>.
+  The storage policy can be specified using
+  the "<<<{{{Set Storage Policy}dfsadmin -setStoragePolicy}}>>>" command.
+  The effective storage policy of a file or directory is resolved by the following rules.
+
+  [[1]] If the file or directory is specificed with a storage policy, return it.
+
+  [[2]] For an unspecified file or directory,
+        if it is the root directory, return the <default storage policy>.
+        Otherwise, return its parent's effective storage policy.
 
 
   []
   []
 
 
-*** {Setting Storage Policy Details}
-
-  The following configuration properties are for setting the details of each storage policy,
-  where <<<\<ID\>>>> is the actual policy ID.
-
-  * <<dfs.block.storage.policy.\<ID\>>>
-    - a list of storage types for storing the block replicas.
-    The syntax is
-
-      STORAGE_TYPE_1, STORAGE_TYPE_2, ..., STORAGE_TYPE_<n>
-  
-    When creating a block, the <i>-th replica is stored using <i>-th storage type
-    for <i> less than or equal to <n>, and
-    the <j>-th replica is stored using <n>-th storage type for <j> greater than <n>.
-
-    Empty list is not allowed.
-
-    Examples:
-
-+------------------------------------------+
-DISK          : all replicas stored using DISK.
-DISK, ARCHIVE : the first replica is stored using DISK and all the
-                remaining replicas are stored using ARCHIVE.
-+------------------------------------------+
-
-  * <<dfs.block.storage.policy.creation-fallback.\<ID\>>>
-    - a list of storage types for creation fallback storage.
-    The syntax is
+  The effective storage policy can be retrieved by
+  the "<<<{{{Set Storage Policy}dfsadmin -getStoragePolicy}}>>>" command.
 
 
-      STORAGE_TYPE_1, STORAGE_TYPE_2, ..., STORAGE_TYPE_n
-  
-    When creating a block, if a particular storage type specified in the policy
-    is unavailable, the fallback STORAGE_TYPE_1 is used.  Further, if
-    STORAGE_TYPE_<i> is also unavailable, the fallback STORAGE_TYPE_<(i+1)> is used.
-    In case all fallback storages are unavailable, the block will be created
-    with number of replicas less than the specified replication factor.
 
 
-    An empty list indicates that there is no fallback storage.
+** {Configuration}
 
 
-  * <<dfs.block.storage.policy.replication-fallback.\<ID\>>>
-    - a list of storage types for replication fallback storage.
-    The usage of this configuration property is similar to
-    <<<dfs.block.storage.policy.creation-fallback.\<ID\>>>>
-    except that it takes effect on replication but not block creation.
+  * <<dfs.storage.policy.enabled>>
+    - for enabling/disabling the storage policy feature.
+    The default value is <<<true>>>.
 
 
   []
   []
 
 
-  The following are the default configuration values for Hot, Warm and Cold storage policies.
-
-  * Block Storage Policy <<HOT:12>>
-
-+------------------------------------------+
-<property>
-  <name>dfs.block.storage.policy.12</name>
-  <value>DISK</value>
-</property>
-<property>
-  <name>dfs.block.storage.policy.creation-fallback.12</name>
-  <value></value>
-</property>
-<property>
-  <name>dfs.block.storage.policy.replication-fallback.12</name>
-  <value>ARCHIVE</value>
-</property>
-+------------------------------------------+
-
-  * Block Storage Policy <<WARM:8>>
-
-+------------------------------------------+
-<property>
-  <name>dfs.block.storage.policy.8</name>
-  <value>DISK, ARCHIVE</value>
-</property>
-<property>
-  <name>dfs.block.storage.policy.creation-fallback.8</name>
-  <value>DISK, ARCHIVE</value>
-</property>
-<property>
-  <name>dfs.block.storage.policy.replication-fallback.8</name>
-  <value>DISK, ARCHIVE</value>
-</property>
-+------------------------------------------+
-
-  * Block Storage Policy <<COLD:4>>
-
-+------------------------------------------+
-<property>
-  <name>dfs.block.storage.policy.4</name>
-  <value>ARCHIVE</value>
-</property>
-<property>
-  <name>dfs.block.storage.policy.creation-fallback.4</name>
-  <value></value>
-</property>
-<property>
-  <name>dfs.block.storage.policy.replication-fallback.4</name>
-  <value></value>
-</property>
-+------------------------------------------+
-
-  []
 
 
 * {Mover - A New Data Migration Tool}
 * {Mover - A New Data Migration Tool}
 
 
@@ -261,7 +180,19 @@ hdfs mover [-p <files/dirs> | -f <local file name>]
   []
   []
 
 
 
 
-* {<<<DFSAdmin>>> Commands}
+* {Storage Policy Commands}
+
+** {List Storage Policies}
+
+  List out all the storage policies.
+
+  * Command:
+
++------------------------------------------+
+hdfs storagepolicies
++------------------------------------------+
+
+  * Arguments: none.
 
 
 ** {Set Storage Policy}
 ** {Set Storage Policy}
 
 

+ 1 - 1
hadoop-project/src/site/site.xml

@@ -92,7 +92,7 @@
       <item name="Extended Attributes" href="hadoop-project-dist/hadoop-hdfs/ExtendedAttributes.html"/>
       <item name="Extended Attributes" href="hadoop-project-dist/hadoop-hdfs/ExtendedAttributes.html"/>
       <item name="Transparent Encryption" href="hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html"/>
       <item name="Transparent Encryption" href="hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html"/>
       <item name="HDFS Support for Multihoming" href="hadoop-project-dist/hadoop-hdfs/HdfsMultihoming.html"/>
       <item name="HDFS Support for Multihoming" href="hadoop-project-dist/hadoop-hdfs/HdfsMultihoming.html"/>
-      <item name="Archival Storage" href="hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html"/>
+      <item name="Archival Storage, SSD &amp; Memory" href="hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html"/>
     </menu>
     </menu>
 
 
     <menu name="MapReduce" inherit="top">
     <menu name="MapReduce" inherit="top">