Преглед изворни кода

AMBARI-7615. Ambari support for DataNode no longer running as root. (dlysnichenko)

Lisnichenko Dmitro пре 10 година
родитељ
комит
7da3a8e312

+ 9 - 1
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HDFS/package/scripts/params.py

@@ -146,6 +146,11 @@ fs_checkpoint_dir = config['configurations']['hdfs-site']['dfs.namenode.checkpoi
 dfs_data_dir = config['configurations']['hdfs-site']['dfs.datanode.data.dir']
 data_dir_mount_file = config['configurations']['hadoop-env']['dfs.datanode.data.dir.mount.file']
 
+dfs_dn_addr = default('/configurations/hdfs-site/dfs.datanode.address', None)
+dfs_dn_http_addr = default('/configurations/hdfs-site/dfs.datanode.http.address', None)
+dfs_dn_https_addr = default('/configurations/hdfs-site/dfs.datanode.https.address', None)
+dfs_http_policy = default('/configurations/hdfs-site/dfs.http.policy', None)
+
 # HDFS High Availability properties
 dfs_ha_enabled = False
 dfs_ha_nameservices = default("/configurations/hdfs-site/dfs.nameservices", None)
@@ -205,8 +210,11 @@ hadoop_env_sh_template = config['configurations']['hadoop-env']['content']
 
 #hadoop-env.sh
 java_home = config['hostLevelParams']['java_home']
+stack_version = str(config['hostLevelParams']['stack_version'])
+
+stack_is_champlain_or_further = not (stack_version.startswith('2.0') or stack_version.startswith('2.1'))
 
-if str(config['hostLevelParams']['stack_version']).startswith('2.0') and System.get_instance().os_family != "suse":
+if stack_version.startswith('2.0') and System.get_instance().os_family != "suse":
   # deprecated rhel jsvc_path
   jsvc_path = "/usr/libexec/bigtop-utils"
 else:

+ 87 - 4
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HDFS/package/scripts/utils.py

@@ -16,8 +16,10 @@ See the License for the specific language governing permissions and
 limitations under the License.
 
 """
+import os
 
 from resource_management import *
+import re
 
 
 def service(action=None, name=None, user=None, create_pid_dir=False,
@@ -30,10 +32,6 @@ def service(action=None, name=None, user=None, create_pid_dir=False,
   check_process = format(
     "ls {pid_file} >/dev/null 2>&1 &&"
     " ps `cat {pid_file}` >/dev/null 2>&1")
-  hadoop_daemon = format(
-    "export HADOOP_LIBEXEC_DIR={hadoop_libexec_dir} && "
-    "{hadoop_bin}/hadoop-daemon.sh")
-  cmd = format("{hadoop_daemon} --config {hadoop_conf_dir}")
 
   if create_pid_dir:
     Directory(pid_dir,
@@ -44,10 +42,74 @@ def service(action=None, name=None, user=None, create_pid_dir=False,
               owner=user,
               recursive=True)
 
+  hadoop_env_exports = {
+    'HADOOP_LIBEXEC_DIR': params.hadoop_libexec_dir
+  }
+
   if params.security_enabled and name == "datanode":
+    dfs_dn_port = get_port(params.dfs_dn_addr)
+    dfs_dn_http_port = get_port(params.dfs_dn_http_addr)
+    dfs_dn_https_port = get_port(params.dfs_dn_https_addr)
+
+    # We try to avoid inability to start datanode as a plain user due to usage of root-owned ports
+    if params.dfs_http_policy == "HTTPS_ONLY":
+      secure_ports_are_in_use = is_secure_port(dfs_dn_port) or is_secure_port(dfs_dn_https_port)
+    elif params.dfs_http_policy == "HTTP_AND_HTTPS":
+      secure_ports_are_in_use = is_secure_port(dfs_dn_port) or is_secure_port(dfs_dn_http_port) or is_secure_port(dfs_dn_https_port)
+    else:   # params.dfs_http_policy == "HTTP_ONLY" or not defined:
+      secure_ports_are_in_use = is_secure_port(dfs_dn_port) or is_secure_port(dfs_dn_http_port)
+
+    # Calculate HADOOP_SECURE_DN_* env vars, but not append them yet
+    # These variables should not be set when starting secure datanode as a non-root
+    ## On secure datanodes, user to run the datanode as after dropping privileges
+    hadoop_secure_dn_user = params.hdfs_user
+    ## Where log files are stored in the secure data environment.
+    hadoop_secure_dn_log_dir = format("{hdfs_log_dir_prefix}/{hadoop_secure_dn_user}")
+    ## The directory where pid files are stored in the secure data environment.
+    hadoop_secure_dn_pid_dir = format("{hadoop_pid_dir_prefix}/{hadoop_secure_dn_user}")
+    hadoop_secure_dn_exports = {
+      'HADOOP_SECURE_DN_USER' : hadoop_secure_dn_user,
+      'HADOOP_SECURE_DN_LOG_DIR' : hadoop_secure_dn_log_dir,
+      'HADOOP_SECURE_DN_PID_DIR' : hadoop_secure_dn_pid_dir
+    }
+    hadoop_secure_dn_pid_file = format("{hadoop_secure_dn_pid_dir}/hadoop_secure_dn.pid")
+
+    # At Champlain stack and further, we may start datanode as a non-root even in secure cluster
+    if not params.stack_is_champlain_or_further or secure_ports_are_in_use:
       user = "root"
       pid_file = format(
         "{hadoop_pid_dir_prefix}/{hdfs_user}/hadoop-{hdfs_user}-{name}.pid")
+      if params.stack_is_champlain_or_further:
+        hadoop_env_exports.update(hadoop_secure_dn_exports)
+
+    if action == 'stop' and params.stack_is_champlain_or_further and \
+      os.path.isfile(hadoop_secure_dn_pid_file):
+        # We need special handling for this case to handle the situation
+        # when we configure non-root secure DN and then restart it
+        # to handle new configs. Otherwise we will not be able to stop
+        # a running instance
+        user = "root"
+        try:
+          with open(hadoop_secure_dn_pid_file, 'r') as f:
+            pid = f.read()
+          os.kill(int(pid), 0)
+          hadoop_env_exports.update(hadoop_secure_dn_exports)
+        except IOError:
+          pass  # Can not open pid file
+        except ValueError:
+          pass  # Pid file content is invalid
+        except OSError:
+          pass  # Process is not running
+
+
+  hadoop_env_exports_str = ''
+  for exp in hadoop_env_exports.items():
+    hadoop_env_exports_str += "export {0}={1} && ".format(exp[0], exp[1])
+
+  hadoop_daemon = format(
+    "{hadoop_env_exports_str}"
+    "{hadoop_bin}/hadoop-daemon.sh")
+  cmd = format("{hadoop_daemon} --config {hadoop_conf_dir}")
 
   daemon_cmd = format("{ulimit_cmd} su - {user} -c '{cmd} {action} {name}'")
 
@@ -64,3 +126,24 @@ def service(action=None, name=None, user=None, create_pid_dir=False,
     File(pid_file,
          action="delete",
     )
+
+def get_port(address):
+  """
+  Extracts port from the address like 0.0.0.0:1019
+  """
+  if address is None:
+    return None
+  m = re.search(r'(?:http(?:s)?://)?([\w\d.]*):(\d{1,5})', address)
+  if m is not None:
+    return int(m.group(2))
+  else:
+    return None
+
+def is_secure_port(port):
+  """
+  Returns True if port is root-owned at *nix systems
+  """
+  if port is not None:
+    return port < 1024
+  else:
+    return False

+ 24 - 3
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py

@@ -201,7 +201,7 @@ class HDP206StackAdvisor(DefaultStackAdvisor):
         if siteName in recommendedDefaults:
           siteProperties = getSiteProperties(configurations, siteName)
           if siteProperties is not None:
-            resultItems = method(siteProperties, recommendedDefaults[siteName]["properties"])
+            resultItems = method(siteProperties, recommendedDefaults[siteName]["properties"], configurations)
             items.extend(resultItems)
     return items
 
@@ -259,7 +259,7 @@ class HDP206StackAdvisor(DefaultStackAdvisor):
       return self.getWarnItem("Value is less than the recommended default of -Xmx" + defaultValueXmx)
     return None
 
-  def validateMapReduce2Configurations(self, properties, recommendedDefaults):
+  def validateMapReduce2Configurations(self, properties, recommendedDefaults, configurations):
     validationItems = [ {"config-name": 'mapreduce.map.java.opts', "item": self.validateXmxValue(properties, recommendedDefaults, 'mapreduce.map.java.opts')},
                         {"config-name": 'mapreduce.reduce.java.opts', "item": self.validateXmxValue(properties, recommendedDefaults, 'mapreduce.reduce.java.opts')},
                         {"config-name": 'mapreduce.task.io.sort.mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'mapreduce.task.io.sort.mb')},
@@ -269,7 +269,7 @@ class HDP206StackAdvisor(DefaultStackAdvisor):
                         {"config-name": 'yarn.app.mapreduce.am.command-opts', "item": self.validateXmxValue(properties, recommendedDefaults, 'yarn.app.mapreduce.am.command-opts')} ]
     return self.toConfigurationValidationProblems(validationItems, "mapred-site")
 
-  def validateYARNConfigurations(self, properties, recommendedDefaults):
+  def validateYARNConfigurations(self, properties, recommendedDefaults, configurations):
     validationItems = [ {"config-name": 'yarn.nodemanager.resource.memory-mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'yarn.nodemanager.resource.memory-mb')},
                         {"config-name": 'yarn.scheduler.minimum-allocation-mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'yarn.scheduler.minimum-allocation-mb')},
                         {"config-name": 'yarn.scheduler.maximum-allocation-mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'yarn.scheduler.maximum-allocation-mb')} ]
@@ -349,3 +349,24 @@ def formatXmxSizeToBytes(value):
     modifier == 'p': 1024 * 1024 * 1024 * 1024 * 1024
     }[1]
   return to_number(value) * m
+
+def getPort(address):
+  """
+  Extracts port from the address like 0.0.0.0:1019
+  """
+  if address is None:
+    return None
+  m = re.search(r'(?:http(?:s)?://)?([\w\d.]*):(\d{1,5})', address)
+  if m is not None:
+    return int(m.group(2))
+  else:
+    return None
+
+def isSecurePort(port):
+  """
+  Returns True if port is root-owned at *nix systems
+  """
+  if port is not None:
+    return port < 1024
+  else:
+    return False

+ 2 - 2
ambari-server/src/main/resources/stacks/HDP/2.1/services/stack_advisor.py

@@ -77,13 +77,13 @@ class HDP21StackAdvisor(HDP206StackAdvisor):
     parentValidators.update(childValidators)
     return parentValidators
 
-  def validateHiveConfigurations(self, properties, recommendedDefaults):
+  def validateHiveConfigurations(self, properties, recommendedDefaults, configurations):
     validationItems = [ {"config-name": 'hive.tez.container.size', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'hive.tez.container.size')},
                         {"config-name": 'hive.tez.java.opts', "item": self.validateXmxValue(properties, recommendedDefaults, 'hive.tez.java.opts')},
                         {"config-name": 'hive.auto.convert.join.noconditionaltask.size', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'hive.auto.convert.join.noconditionaltask.size')} ]
     return self.toConfigurationValidationProblems(validationItems, "hive-site")
 
-  def validateTezConfigurations(self, properties, recommendedDefaults):
+  def validateTezConfigurations(self, properties, recommendedDefaults, configurations):
     validationItems = [ {"config-name": 'tez.am.resource.memory.mb', "item": self.validatorLessThenDefaultValue(properties, recommendedDefaults, 'tez.am.resource.memory.mb')},
                         {"config-name": 'tez.am.java.opts', "item": self.validateXmxValue(properties, recommendedDefaults, 'tez.am.java.opts')} ]
     return self.toConfigurationValidationProblems(validationItems, "tez-site")

+ 192 - 0
ambari-server/src/main/resources/stacks/HDP/2.2/services/HDFS/configuration/hadoop-env.xml

@@ -0,0 +1,192 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+
+<configuration>
+  <property>
+    <name>hdfs_log_dir_prefix</name>
+    <value>/var/log/hadoop</value>
+    <description>Hadoop Log Dir Prefix</description>
+  </property>
+  <property>
+    <name>hadoop_pid_dir_prefix</name>
+    <value>/var/run/hadoop</value>
+    <description>Hadoop PID Dir Prefix</description>
+  </property>
+  <property>
+    <name>hadoop_root_logger</name>
+    <value>INFO,RFA</value>
+    <description>Hadoop Root Logger</description>
+  </property>
+  <property>
+    <name>hadoop_heapsize</name>
+    <value>1024</value>
+    <description>Hadoop maximum Java heap size</description>
+  </property>
+  <property>
+    <name>namenode_heapsize</name>
+    <value>1024</value>
+    <description>NameNode Java heap size</description>
+  </property>
+  <property>
+    <name>namenode_opt_newsize</name>
+    <value>200</value>
+    <description>NameNode new generation size</description>
+  </property>
+  <property>
+    <name>namenode_opt_maxnewsize</name>
+    <value>200</value>
+    <description>NameNode maximum new generation size</description>
+  </property>
+  <property>
+    <name>dtnode_heapsize</name>
+    <value>1024</value>
+    <description>DataNode maximum Java heap size</description>
+  </property>
+  <property>
+    <name>proxyuser_group</name>
+    <value>users</value>
+    <property-type>GROUP</property-type>
+    <description>Proxy user group.</description>
+  </property>
+  <property>
+    <name>hdfs_user</name>
+    <value>hdfs</value>
+    <property-type>USER</property-type>
+    <description>User to run HDFS as</description>
+  </property>
+  
+  <!-- hadoop-env.sh -->
+  <property>
+    <name>content</name>
+    <description>This is the jinja template for hadoop-env.sh file</description>
+    <value>
+# Set Hadoop-specific environment variables here.
+
+# The only required environment variable is JAVA_HOME.  All others are
+# optional.  When running a distributed configuration it is best to
+# set JAVA_HOME in this file, so that it is correctly defined on
+# remote nodes.
+
+# The java implementation to use.  Required.
+export JAVA_HOME={{java_home}}
+export HADOOP_HOME_WARN_SUPPRESS=1
+
+# Hadoop home directory
+export HADOOP_HOME=${HADOOP_HOME:-/usr/lib/hadoop}
+
+# Hadoop Configuration Directory
+#TODO: if env var set that can cause problems
+export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-{{hadoop_conf_dir}}}
+
+{# this is different for HDP1 #}
+# Path to jsvc required by secure HDP 2.0 datanode
+export JSVC_HOME={{jsvc_path}}
+
+
+# The maximum amount of heap to use, in MB. Default is 1000.
+export HADOOP_HEAPSIZE="{{hadoop_heapsize}}"
+
+export HADOOP_NAMENODE_INIT_HEAPSIZE="-Xms{{namenode_heapsize}}"
+
+# Extra Java runtime options.  Empty by default.
+export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true ${HADOOP_OPTS}"
+
+# Command specific options appended to HADOOP_OPTS when specified
+export HADOOP_NAMENODE_OPTS="-server -XX:ParallelGCThreads=8 -XX:+UseConcMarkSweepGC -XX:ErrorFile={{hdfs_log_dir_prefix}}/$USER/hs_err_pid%p.log -XX:NewSize={{namenode_opt_newsize}} -XX:MaxNewSize={{namenode_opt_maxnewsize}} -Xloggc:{{hdfs_log_dir_prefix}}/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xms{{namenode_heapsize}} -Xmx{{namenode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT ${HADOOP_NAMENODE_OPTS}"
+HADOOP_JOBTRACKER_OPTS="-server -XX:ParallelGCThreads=8 -XX:+UseConcMarkSweepGC -XX:ErrorFile={{hdfs_log_dir_prefix}}/$USER/hs_err_pid%p.log -XX:NewSize={{jtnode_opt_newsize}} -XX:MaxNewSize={{jtnode_opt_maxnewsize}} -Xloggc:{{hdfs_log_dir_prefix}}/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xmx{{jtnode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dmapred.audit.logger=INFO,MRAUDIT -Dhadoop.mapreduce.jobsummary.logger=INFO,JSA ${HADOOP_JOBTRACKER_OPTS}"
+
+HADOOP_TASKTRACKER_OPTS="-server -Xmx{{ttnode_heapsize}} -Dhadoop.security.logger=ERROR,console -Dmapred.audit.logger=ERROR,console ${HADOOP_TASKTRACKER_OPTS}"
+HADOOP_DATANODE_OPTS="-Xmx{{dtnode_heapsize}} -Dhadoop.security.logger=ERROR,DRFAS ${HADOOP_DATANODE_OPTS}"
+HADOOP_BALANCER_OPTS="-server -Xmx{{hadoop_heapsize}}m ${HADOOP_BALANCER_OPTS}"
+
+export HADOOP_SECONDARYNAMENODE_OPTS="-server -XX:ParallelGCThreads=8 -XX:+UseConcMarkSweepGC -XX:ErrorFile={{hdfs_log_dir_prefix}}/$USER/hs_err_pid%p.log -XX:NewSize={{namenode_opt_newsize}} -XX:MaxNewSize={{namenode_opt_maxnewsize}} -Xloggc:{{hdfs_log_dir_prefix}}/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps ${HADOOP_NAMENODE_INIT_HEAPSIZE} -Xmx{{namenode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT ${HADOOP_SECONDARYNAMENODE_OPTS}"
+
+# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
+export HADOOP_CLIENT_OPTS="-Xmx${HADOOP_HEAPSIZE}m $HADOOP_CLIENT_OPTS"
+
+# Extra ssh options.  Empty by default.
+export HADOOP_SSH_OPTS="-o ConnectTimeout=5 -o SendEnv=HADOOP_CONF_DIR"
+
+# Where log files are stored.  $HADOOP_HOME/logs by default.
+export HADOOP_LOG_DIR={{hdfs_log_dir_prefix}}/$USER
+
+# History server logs
+export HADOOP_MAPRED_LOG_DIR={{mapred_log_dir_prefix}}/$USER
+
+# File naming remote slave hosts.  $HADOOP_HOME/conf/slaves by default.
+# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
+
+# host:path where hadoop code should be rsync'd from.  Unset by default.
+# export HADOOP_MASTER=master:/home/$USER/src/hadoop
+
+# Seconds to sleep between slave commands.  Unset by default.  This
+# can be useful in large clusters, where, e.g., slave rsyncs can
+# otherwise arrive faster than the master can service them.
+# export HADOOP_SLAVE_SLEEP=0.1
+
+# The directory where pid files are stored. /tmp by default.
+export HADOOP_PID_DIR={{hadoop_pid_dir_prefix}}/$USER
+
+# History server pid
+export HADOOP_MAPRED_PID_DIR={{mapred_pid_dir_prefix}}/$USER
+
+YARN_RESOURCEMANAGER_OPTS="-Dyarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY"
+
+# A string representing this instance of hadoop. $USER by default.
+export HADOOP_IDENT_STRING=$USER
+
+# The scheduling priority for daemon processes.  See 'man nice'.
+
+# export HADOOP_NICENESS=10
+
+# Use libraries from standard classpath
+JAVA_JDBC_LIBS=""
+#Add libraries required by mysql connector
+for jarFile in `ls /usr/share/java/*mysql* 2>/dev/null`
+do
+  JAVA_JDBC_LIBS=${JAVA_JDBC_LIBS}:$jarFile
+done
+#Add libraries required by oracle connector
+for jarFile in `ls /usr/share/java/*ojdbc* 2>/dev/null`
+do
+  JAVA_JDBC_LIBS=${JAVA_JDBC_LIBS}:$jarFile
+done
+#Add libraries required by nodemanager
+MAPREDUCE_LIBS={{mapreduce_libs_path}}
+export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}${JAVA_JDBC_LIBS}:${MAPREDUCE_LIBS}
+
+if [ -d "/usr/lib/tez" ]; then
+  export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/usr/lib/tez/*:/usr/lib/tez/lib/*:/etc/tez/conf
+fi
+
+# Setting path to hdfs command line
+export HADOOP_LIBEXEC_DIR={{hadoop_libexec_dir}}
+
+#Mostly required for hadoop 2.0
+export JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:/usr/lib/hadoop/lib/native/Linux-amd64-64
+
+#Hadoop logging options
+export HADOOP_ROOT_LOGGER={{hadoop_root_logger}}
+    </value>
+  </property>
+  
+</configuration>

+ 151 - 0
ambari-server/src/main/resources/stacks/HDP/2.2/services/stack_advisor.py

@@ -0,0 +1,151 @@
+#!/usr/bin/env ambari-python-wrap
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+class HDP22StackAdvisor(HDP21StackAdvisor):
+
+  def getServiceConfigurationRecommenderDict(self):
+    parentRecommendConfDict = super(HDP22StackAdvisor, self).getServiceConfigurationRecommenderDict()
+    childRecommendConfDict = {
+      "HDFS": self.recommendHDFSConfigurations,
+    }
+    parentRecommendConfDict.update(childRecommendConfDict)
+    return parentRecommendConfDict
+
+
+  def recommendHDFSConfigurations(self, configurations, clusterData):
+    self.putProperty(configurations, "hdfs-site")
+
+  def getServiceConfigurationValidators(self):
+    parentValidators = super(HDP22StackAdvisor, self).getServiceConfigurationValidators()
+    childValidators = {
+      "HDFS": ["hdfs-site", self.validateHDFSConfigurations],
+    }
+    parentValidators.update(childValidators)
+    return parentValidators
+
+
+  def validateHDFSConfigurations(self, properties, recommendedDefaults, configurations):
+    # We can not access property hadoop.security.authentication from the
+    # other config (core-site). That's why we are using another heuristics here
+    hdfs_site = properties
+    core_site = getSiteProperties(configurations, "core-site")
+
+    dfs_encrypt_data_transfer = 'dfs.encrypt.data.transfer'  # Hadoop Wire encryption
+    try:
+      wire_encryption_enabled = hdfs_site[dfs_encrypt_data_transfer] == "true"
+    except KeyError:
+      wire_encryption_enabled = False
+
+    HTTP_ONLY = 'HTTP_ONLY'
+    HTTPS_ONLY = 'HTTPS_ONLY'
+    HTTP_AND_HTTPS = 'HTTP_AND_HTTPS'
+
+    VALID_HTTP_POLICY_VALUES = [HTTP_ONLY, HTTPS_ONLY, HTTP_AND_HTTPS]
+    VALID_TRANSFER_PROTECTION_VALUES = ['authentication', 'integrity', 'privacy']
+
+    validationItems = []
+    if (not wire_encryption_enabled and   # If wire encryption is enabled at Hadoop, it disables all our checks
+          core_site['hadoop.security.authentication'] == 'kerberos' and
+          core_site['hadoop.security.authorization'] == 'true'):
+      # security is enabled
+
+      dfs_http_policy = 'dfs.http.policy'
+      dfs_datanode_address = 'dfs.datanode.address'
+      datanode_http_address = 'dfs.datanode.http.address'
+      datanode_https_address = 'dfs.datanode.https.address'
+      data_transfer_protection = 'dfs.data.transfer.protection'
+
+      try: # Params may be absent
+        privileged_dfs_dn_port = isSecurePort(getPort(hdfs_site[dfs_datanode_address]))
+      except KeyError:
+        privileged_dfs_dn_port = False
+      try:
+        privileged_dfs_http_port = isSecurePort(getPort(hdfs_site[datanode_http_address]))
+      except KeyError:
+        privileged_dfs_http_port = False
+      try:
+        privileged_dfs_https_port = isSecurePort(getPort(hdfs_site[datanode_https_address]))
+      except KeyError:
+        privileged_dfs_https_port = False
+      try:
+        dfs_http_policy_value = hdfs_site[dfs_http_policy]
+      except KeyError:
+        dfs_http_policy_value = HTTP_ONLY  # Default
+      try:
+        data_transfer_protection_value = hdfs_site[data_transfer_protection]
+      except KeyError:
+        data_transfer_protection_value = None
+
+      if dfs_http_policy_value not in VALID_HTTP_POLICY_VALUES:
+        validationItems.append({"config-name": dfs_http_policy,
+                                "item": self.getWarnItem(
+                                  "Invalid property value: {0}. Valid values are {1}".format(
+                                    dfs_http_policy_value, VALID_HTTP_POLICY_VALUES))})
+
+      # determine whether we use secure ports
+      address_properties_with_warnings = []
+      if dfs_http_policy_value == HTTPS_ONLY:
+        any_privileged_ports_are_in_use = privileged_dfs_dn_port or privileged_dfs_https_port
+        if any_privileged_ports_are_in_use:
+          important_properties = [dfs_datanode_address, datanode_https_address]
+          message = "You set up datanode to use some non-secure ports, but {0} is set to {1}. " \
+                    "If you want to run Datanode under non-root user in a secure cluster, " \
+                    "you should set all these properties {2} " \
+                    "to use non-secure ports (if property {3} does not exist, " \
+                    "just add it). You may also set up property {4} ('{5}' is a good default value). " \
+                    "Also, set up WebHDFS with SSL as " \
+                    "described in manual in order to be able to " \
+                    "use HTTPS.".format(dfs_http_policy, dfs_http_policy_value, important_properties,
+                                        datanode_https_address, data_transfer_protection,
+                                        VALID_TRANSFER_PROTECTION_VALUES[0])
+          address_properties_with_warnings.extend(important_properties)
+      else:  # dfs_http_policy_value == HTTP_AND_HTTPS or HTTP_ONLY
+        # We don't enforce datanode_https_address to use privileged ports here
+        any_nonprivileged_ports_are_in_use = not privileged_dfs_dn_port or not privileged_dfs_http_port
+        if any_nonprivileged_ports_are_in_use:
+          important_properties = [dfs_datanode_address, datanode_http_address]
+          message = "You have set up datanode to use some non-secure ports, but {0} is set to {1}. " \
+                    "In a secure cluster, Datanode forbids using non-secure ports " \
+                    "if {0} is not set to {3}. " \
+                    "Please make sure that properties {2} use secure ports.".format(
+                      dfs_http_policy, dfs_http_policy_value, important_properties, HTTPS_ONLY)
+          address_properties_with_warnings.extend(important_properties)
+
+      # Generate port-related warnings if any
+      for prop in address_properties_with_warnings:
+        validationItems.append({"config-name": prop,
+                                "item": self.getWarnItem(message)})
+
+      # Check if it is appropriate to use dfs.data.transfer.protection
+      if data_transfer_protection_value is not None:
+        if dfs_http_policy_value in [HTTP_ONLY, HTTP_AND_HTTPS]:
+          validationItems.append({"config-name": data_transfer_protection,
+                                  "item": self.getWarnItem(
+                                    "{0} property can not be used when {1} is set to any "
+                                    "value other then {2}. Tip: When {1} property is not defined, it defaults to {3}".format(
+                                    data_transfer_protection, dfs_http_policy, HTTPS_ONLY, HTTP_ONLY))})
+        elif not data_transfer_protection_value in VALID_TRANSFER_PROTECTION_VALUES:
+          validationItems.append({"config-name": data_transfer_protection,
+                                  "item": self.getWarnItem(
+                                    "Invalid property value: {0}. Valid values are {1}.".format(
+                                      data_transfer_protection_value, VALID_TRANSFER_PROTECTION_VALUES))})
+    return self.toConfigurationValidationProblems(validationItems, "hdfs-site")
+
+
+

+ 136 - 0
ambari-server/src/test/python/stacks/2.0.6/HDFS/test_datanode.py

@@ -18,6 +18,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 '''
 from ambari_commons import OSCheck
+import json
 from mock.mock import MagicMock, patch
 from stacks.utils.RMFTestCase import *
 
@@ -56,6 +57,7 @@ class TestDatanode(RMFTestCase):
                               )
     self.assertNoMoreResources()
 
+  @patch("os.path.exists", new = MagicMock(return_value=False))
   def test_stop_default(self):
     self.executeScript("2.0.6/services/HDFS/package/scripts/datanode.py",
                        classname = "DataNode",
@@ -115,6 +117,70 @@ class TestDatanode(RMFTestCase):
                               )
     self.assertNoMoreResources()
 
+  def test_start_secured_HDP22_root(self):
+    config_file = "stacks/2.0.6/configs/secured.json"
+    with open(config_file, "r") as f:
+      secured_json = json.load(f)
+
+    secured_json['hostLevelParams']['stack_version']= '2.2'
+
+    self.executeScript("2.0.6/services/HDFS/package/scripts/datanode.py",
+                       classname = "DataNode",
+                       command = "start",
+                       config_dict = secured_json
+    )
+    self.assert_configure_secured()
+    self.assertResourceCalled('Directory', '/var/run/hadoop/hdfs',
+                              owner = 'hdfs',
+                              recursive = True,
+                              )
+    self.assertResourceCalled('Directory', '/var/log/hadoop/hdfs',
+                              owner = 'hdfs',
+                              recursive = True,
+                              )
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid',
+                              action = ['delete'],
+                              not_if='ls /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid >/dev/null 2>&1 && ps `cat /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid` >/dev/null 2>&1',
+                              )
+    self.assertResourceCalled('Execute', 'ulimit -c unlimited;  su - root -c \'export HADOOP_SECURE_DN_PID_DIR=/var/run/hadoop/hdfs && export HADOOP_SECURE_DN_LOG_DIR=/var/log/hadoop/hdfs && export HADOOP_SECURE_DN_USER=hdfs && export HADOOP_LIBEXEC_DIR=/usr/lib/hadoop/libexec && /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf start datanode\'',
+                              not_if = 'ls /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid >/dev/null 2>&1 && ps `cat /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid` >/dev/null 2>&1',
+                              )
+    self.assertNoMoreResources()
+
+  def test_start_secured_HDP22_non_root_https_only(self):
+    config_file="stacks/2.0.6/configs/secured.json"
+    with open(config_file, "r") as f:
+      secured_json = json.load(f)
+
+    secured_json['hostLevelParams']['stack_version']= '2.2'
+    secured_json['configurations']['hdfs-site']['dfs.http.policy']= 'HTTPS_ONLY'
+    secured_json['configurations']['hdfs-site']['dfs.datanode.address']= '0.0.0.0:10000'
+    secured_json['configurations']['hdfs-site']['dfs.datanode.https.address']= '0.0.0.0:50000'
+
+    self.executeScript("2.0.6/services/HDFS/package/scripts/datanode.py",
+                       classname = "DataNode",
+                       command = "start",
+                       config_dict = secured_json
+    )
+    self.assert_configure_secured()
+    self.assertResourceCalled('Directory', '/var/run/hadoop/hdfs',
+                              owner = 'hdfs',
+                              recursive = True,
+                              )
+    self.assertResourceCalled('Directory', '/var/log/hadoop/hdfs',
+                              owner = 'hdfs',
+                              recursive = True,
+                              )
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid',
+                              action = ['delete'],
+                              not_if='ls /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid >/dev/null 2>&1 && ps `cat /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid` >/dev/null 2>&1',
+                              )
+    self.assertResourceCalled('Execute', 'ulimit -c unlimited;  su - hdfs -c \'export HADOOP_LIBEXEC_DIR=/usr/lib/hadoop/libexec && /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf start datanode\'',
+                              not_if = 'ls /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid >/dev/null 2>&1 && ps `cat /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid` >/dev/null 2>&1',
+                              )
+    self.assertNoMoreResources()
+
+  @patch("os.path.exists", new = MagicMock(return_value=False))
   def test_stop_secured(self):
     self.executeScript("2.0.6/services/HDFS/package/scripts/datanode.py",
                        classname = "DataNode",
@@ -141,6 +207,76 @@ class TestDatanode(RMFTestCase):
                               )
     self.assertNoMoreResources()
 
+
+  @patch("os.path.exists", new = MagicMock(return_value=False))
+  def test_stop_secured_HDP22_root(self):
+    config_file = "stacks/2.0.6/configs/secured.json"
+    with open(config_file, "r") as f:
+      secured_json = json.load(f)
+
+    secured_json['hostLevelParams']['stack_version']= '2.2'
+
+    self.executeScript("2.0.6/services/HDFS/package/scripts/datanode.py",
+                       classname = "DataNode",
+                       command = "stop",
+                       config_dict = secured_json
+    )
+    self.assertResourceCalled('Directory', '/var/run/hadoop/hdfs',
+                              owner = 'hdfs',
+                              recursive = True,
+                              )
+    self.assertResourceCalled('Directory', '/var/log/hadoop/hdfs',
+                              owner = 'hdfs',
+                              recursive = True,
+                              )
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid',
+                              action = ['delete'],
+                              not_if='ls /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid >/dev/null 2>&1 && ps `cat /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid` >/dev/null 2>&1',
+                              )
+    self.assertResourceCalled('Execute', 'ulimit -c unlimited;  su - root -c \'export HADOOP_SECURE_DN_PID_DIR=/var/run/hadoop/hdfs && export HADOOP_SECURE_DN_LOG_DIR=/var/log/hadoop/hdfs && export HADOOP_SECURE_DN_USER=hdfs && export HADOOP_LIBEXEC_DIR=/usr/lib/hadoop/libexec && /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf stop datanode\'',
+                              not_if = None,
+                              )
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid',
+                              action = ['delete'],
+                              )
+    self.assertNoMoreResources()
+
+  @patch("os.path.exists", new = MagicMock(return_value=False))
+  def test_stop_secured_HDP22_non_root_https_only(self):
+    config_file = "stacks/2.0.6/configs/secured.json"
+    with open(config_file, "r") as f:
+      secured_json = json.load(f)
+
+    secured_json['hostLevelParams']['stack_version']= '2.2'
+    secured_json['configurations']['hdfs-site']['dfs.http.policy']= 'HTTPS_ONLY'
+    secured_json['configurations']['hdfs-site']['dfs.datanode.address']= '0.0.0.0:10000'
+    secured_json['configurations']['hdfs-site']['dfs.datanode.https.address']= '0.0.0.0:50000'
+
+    self.executeScript("2.0.6/services/HDFS/package/scripts/datanode.py",
+                       classname = "DataNode",
+                       command = "stop",
+                       config_dict = secured_json
+    )
+    self.assertResourceCalled('Directory', '/var/run/hadoop/hdfs',
+                              owner = 'hdfs',
+                              recursive = True,
+                              )
+    self.assertResourceCalled('Directory', '/var/log/hadoop/hdfs',
+                              owner = 'hdfs',
+                              recursive = True,
+                              )
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid',
+                              action = ['delete'],
+                              not_if='ls /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid >/dev/null 2>&1 && ps `cat /var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid` >/dev/null 2>&1',
+                              )
+    self.assertResourceCalled('Execute', 'ulimit -c unlimited;  su - hdfs -c \'export HADOOP_LIBEXEC_DIR=/usr/lib/hadoop/libexec && /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf stop datanode\'',
+                              not_if=None,
+                              )
+    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-datanode.pid',
+                              action=['delete'],
+                              )
+    self.assertNoMoreResources()
+
   def assert_configure_default(self):
     self.assertResourceCalled('File', '/etc/security/limits.d/hdfs.conf',
                               content = Template('hdfs.conf.j2'),

+ 311 - 0
ambari-server/src/test/python/stacks/2.2/common/test_stack_advisor.py

@@ -0,0 +1,311 @@
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+import json
+import os
+from unittest import TestCase
+
+class TestHDP22StackAdvisor(TestCase):
+
+  def setUp(self):
+    import imp
+
+    self.testDirectory = os.path.dirname(os.path.abspath(__file__))
+    stackAdvisorPath = os.path.join(self.testDirectory, '../../../../../main/resources/stacks/stack_advisor.py')
+    hdp206StackAdvisorPath = os.path.join(self.testDirectory, '../../../../../main/resources/stacks/HDP/2.0.6/services/stack_advisor.py')
+    hdp21StackAdvisorPath = os.path.join(self.testDirectory, '../../../../../main/resources/stacks/HDP/2.1/services/stack_advisor.py')
+    hdp22StackAdvisorPath = os.path.join(self.testDirectory, '../../../../../main/resources/stacks/HDP/2.2/services/stack_advisor.py')
+    hdp22StackAdvisorClassName = 'HDP22StackAdvisor'
+    with open(stackAdvisorPath, 'rb') as fp:
+      imp.load_module('stack_advisor', fp, stackAdvisorPath, ('.py', 'rb', imp.PY_SOURCE))
+    with open(hdp206StackAdvisorPath, 'rb') as fp:
+      imp.load_module('stack_advisor_impl', fp, hdp206StackAdvisorPath, ('.py', 'rb', imp.PY_SOURCE))
+    with open(hdp21StackAdvisorPath, 'rb') as fp:
+      imp.load_module('stack_advisor_impl', fp, hdp21StackAdvisorPath, ('.py', 'rb', imp.PY_SOURCE))
+    with open(hdp22StackAdvisorPath, 'rb') as fp:
+      stack_advisor_impl = imp.load_module('stack_advisor_impl', fp, hdp22StackAdvisorPath, ('.py', 'rb', imp.PY_SOURCE))
+    clazz = getattr(stack_advisor_impl, hdp22StackAdvisorClassName)
+    self.stackAdvisor = clazz()
+
+
+  def test_validateHDFSConfigurations(self):
+    self.maxDiff = None
+    recommendedDefaults = None
+
+    unsecure_cluster_core_site = {
+      'hadoop.security.authentication': 'simple',
+      'hadoop.security.authorization': 'false',
+    }
+
+    secure_cluster_core_site = {
+      'hadoop.security.authentication': 'kerberos',
+      'hadoop.security.authorization': 'true',
+    }
+
+    # TEST CASE: Unsecured cluster, secure ports
+    properties = {  # hdfs-site
+                    'dfs.datanode.address': '0.0.0.0:1019',
+                    'dfs.datanode.http.address': '0.0.0.0:1022',
+    }
+    configurations = {
+      'hdfs-site': {
+        'properties': properties,
+      },
+      'core-site': {
+        'properties': unsecure_cluster_core_site
+      }
+    }
+    expected = []  # No warnings
+    validation_problems = self.stackAdvisor.validateHDFSConfigurations(properties, recommendedDefaults, configurations)
+    self.assertEquals(validation_problems, expected)
+
+    # TEST CASE: Unsecured cluster, unsecure ports
+    properties = {  # hdfs-site
+                    'dfs.datanode.address': '0.0.0.0:55555',
+                    'dfs.datanode.http.address': '0.0.0.0:55555',
+                    }
+    configurations = {
+      'hdfs-site': {
+        'properties': properties,
+        },
+      'core-site': {
+        'properties': unsecure_cluster_core_site
+      }
+    }
+    expected = []  # No warnings
+    validation_problems = self.stackAdvisor.validateHDFSConfigurations(properties, recommendedDefaults, configurations)
+    self.assertEquals(validation_problems, expected)
+
+    # TEST CASE: Secure cluster, invalid dfs.http.policy value
+    properties = {  # hdfs-site
+                    'dfs.http.policy': 'WRONG_VALUE',
+                    'dfs.datanode.address': '0.0.0.0:1019',
+                    'dfs.datanode.http.address': '0.0.0.0:1022',
+    }
+    configurations = {
+      'hdfs-site': {
+        'properties': properties,
+      },
+      'core-site': {
+        'properties': secure_cluster_core_site
+      }
+    }
+    expected = [{'config-name': 'dfs.http.policy',
+                 'config-type': 'hdfs-site',
+                 'level': 'WARN',
+                 'message': "Invalid property value: WRONG_VALUE. Valid values are ['HTTP_ONLY', 'HTTPS_ONLY', 'HTTP_AND_HTTPS']",
+                 'type': 'configuration'}]
+    validation_problems = self.stackAdvisor.validateHDFSConfigurations(properties, recommendedDefaults, configurations)
+    self.assertEquals(validation_problems, expected)
+
+    # TEST CASE: Secure cluster, dfs.http.policy=HTTPS_ONLY, secure ports
+    properties = {  # hdfs-site
+                    'dfs.http.policy': 'HTTPS_ONLY',
+                    'dfs.datanode.address': '0.0.0.0:1019',
+                    'dfs.datanode.https.address': '0.0.0.0:50475',
+    }
+    configurations = {
+      'hdfs-site': {
+        'properties': properties,
+      },
+      'core-site': {
+        'properties': secure_cluster_core_site
+      }
+    }
+    expected = [{'config-name': 'dfs.datanode.address',
+                 'config-type': 'hdfs-site',
+                 'level': 'WARN',
+                 'message': "You set up datanode to use some non-secure ports, "
+                            "but dfs.http.policy is set to HTTPS_ONLY. If you "
+                            "want to run Datanode under non-root user in a secure"
+                            " cluster, you should set all these properties ['dfs.datanode.address', 'dfs.datanode.https.address'] "
+                            "to use non-secure ports (if property dfs.datanode.https.address does not exist, just add it)."
+                            " You may also set up property dfs.data.transfer.protection ('authentication' is a good default value). "
+                            "Also, set up WebHDFS with SSL as described in manual in order to be able to use HTTPS.",
+                 'type': 'configuration'},
+                {'config-name': 'dfs.datanode.https.address',
+                 'config-type': 'hdfs-site',
+                 'level': 'WARN',
+                 'message': "You set up datanode to use some non-secure ports, "
+                            "but dfs.http.policy is set to HTTPS_ONLY. If you "
+                            "want to run Datanode under non-root user in a secure"
+                            " cluster, you should set all these properties ['dfs.datanode.address', 'dfs.datanode.https.address'] "
+                            "to use non-secure ports (if property dfs.datanode.https.address does not exist, just add it)."
+                            " You may also set up property dfs.data.transfer.protection ('authentication' is a good default value). "
+                            "Also, set up WebHDFS with SSL as described in manual in order to be able to use HTTPS.",
+                 'type': 'configuration'}
+                ]
+    validation_problems = self.stackAdvisor.validateHDFSConfigurations(properties, recommendedDefaults, configurations)
+    self.assertEquals(validation_problems, expected)
+
+
+    # TEST CASE: Secure cluster, dfs.http.policy=HTTPS_ONLY, valid configuration
+    properties = {  # hdfs-site
+                    'dfs.http.policy': 'HTTPS_ONLY',
+                    'dfs.datanode.address': '0.0.0.0:50010',
+                    'dfs.datanode.https.address': '0.0.0.0:50475',
+                    'dfs.data.transfer.protection': 'authentication',
+                    }
+    configurations = {
+      'hdfs-site': {
+        'properties': properties,
+      },
+      'core-site': {
+        'properties': secure_cluster_core_site
+      }
+    }
+    expected = []
+    validation_problems = self.stackAdvisor.validateHDFSConfigurations(properties, recommendedDefaults, configurations)
+    self.assertEquals(validation_problems, expected)
+
+    # TEST CASE: Secure cluster, dfs.http.policy=HTTP_ONLY, insecure ports
+    properties = {  # hdfs-site
+                    'dfs.http.policy': 'HTTP_ONLY',
+                    'dfs.datanode.address': '0.0.0.0:1019',
+                    'dfs.datanode.http.address': '0.0.0.0:50475',
+                    }
+    configurations = {
+      'hdfs-site': {
+        'properties': properties,
+      },
+      'core-site': {
+        'properties': secure_cluster_core_site
+      }
+    }
+    expected = [{'config-name': 'dfs.datanode.address',
+                 'config-type': 'hdfs-site',
+                 'level': 'WARN',
+                 'message': "You have set up datanode to use some non-secure ports, "
+                            "but dfs.http.policy is set to HTTP_ONLY. In a secure cluster, "
+                            "Datanode forbids using non-secure ports if dfs.http.policy is not "
+                            "set to HTTPS_ONLY. Please make sure that properties "
+                            "['dfs.datanode.address', 'dfs.datanode.http.address'] use secure ports.",
+                 'type': 'configuration'},
+                {'config-name': 'dfs.datanode.http.address',
+                 'config-type': 'hdfs-site',
+                 'level': 'WARN',
+                 'message': "You have set up datanode to use some non-secure ports, "
+                            "but dfs.http.policy is set to HTTP_ONLY. In a secure cluster, "
+                            "Datanode forbids using non-secure ports if dfs.http.policy is not "
+                            "set to HTTPS_ONLY. Please make sure that properties "
+                            "['dfs.datanode.address', 'dfs.datanode.http.address'] use secure ports.",
+                 'type': 'configuration'}
+                ]
+    validation_problems = self.stackAdvisor.validateHDFSConfigurations(properties, recommendedDefaults, configurations)
+    self.assertEquals(validation_problems, expected)
+
+    # TEST CASE: Secure cluster, dfs.http.policy=HTTP_ONLY, valid configuration
+    properties = {  # hdfs-site
+                    'dfs.http.policy': 'HTTP_ONLY',
+                    'dfs.datanode.address': '0.0.0.0:1019',
+                    'dfs.datanode.http.address': '0.0.0.0:1022',
+                    }
+    configurations = {
+      'hdfs-site': {
+        'properties': properties,
+        },
+      'core-site': {
+        'properties': secure_cluster_core_site
+      }
+    }
+    expected = []
+    validation_problems = self.stackAdvisor.validateHDFSConfigurations(properties, recommendedDefaults, configurations)
+    self.assertEquals(validation_problems, expected)
+
+    # TEST CASE: Secure cluster, absent dfs.http.policy (typical situation)
+    properties = {  # hdfs-site
+                    'dfs.datanode.address': '0.0.0.0:1019',
+                    'dfs.datanode.http.address': '0.0.0.0:1022',
+                    }
+    configurations = {
+      'hdfs-site': {
+        'properties': properties,
+        },
+      'core-site': {
+        'properties': secure_cluster_core_site
+      }
+    }
+    expected = []
+    validation_problems = self.stackAdvisor.validateHDFSConfigurations(properties, recommendedDefaults, configurations)
+    self.assertEquals(validation_problems, expected)
+
+    # TEST CASE: Secure cluster, dfs.http.policy=HTTP_ONLY, misusage of dfs.data.transfer.protection warning
+    properties = {  # hdfs-site
+                    'dfs.http.policy': 'HTTP_ONLY',
+                    'dfs.datanode.address': '0.0.0.0:1019',
+                    'dfs.datanode.http.address': '0.0.0.0:1022',
+                    'dfs.data.transfer.protection': 'authentication',
+    }
+    configurations = {
+      'hdfs-site': {
+        'properties': properties,
+        },
+      'core-site': {
+        'properties': secure_cluster_core_site
+      }
+    }
+    expected = [{'config-name': 'dfs.data.transfer.protection',
+                 'config-type': 'hdfs-site',
+                 'level': 'WARN',
+                 'message': "dfs.data.transfer.protection property can not be used when dfs.http.policy is "
+                            "set to any value other then HTTPS_ONLY. Tip: When dfs.http.policy property is not defined, it defaults to HTTP_ONLY",
+                 'type': 'configuration'}]
+    validation_problems = self.stackAdvisor.validateHDFSConfigurations(properties, recommendedDefaults, configurations)
+    self.assertEquals(validation_problems, expected)
+
+    # TEST CASE: Secure cluster, dfs.http.policy=HTTPS_ONLY, wrong dfs.data.transfer.protection value
+    properties = {  # hdfs-site
+                    'dfs.http.policy': 'HTTPS_ONLY',
+                    'dfs.datanode.address': '0.0.0.0:50010',
+                    'dfs.datanode.https.address': '0.0.0.0:50475',
+                    'dfs.data.transfer.protection': 'WRONG_VALUE',
+                    }
+    configurations = {
+      'hdfs-site': {
+        'properties': properties,
+      },
+      'core-site': {
+        'properties': secure_cluster_core_site
+      }
+    }
+    expected = [{'config-name': 'dfs.data.transfer.protection',
+                 'config-type': 'hdfs-site',
+                 'level': 'WARN',
+                 'message': "Invalid property value: WRONG_VALUE. Valid values are ['authentication', 'integrity', 'privacy'].",
+                 'type': 'configuration'}]
+    validation_problems = self.stackAdvisor.validateHDFSConfigurations(properties, recommendedDefaults, configurations)
+    self.assertEquals(validation_problems, expected)
+
+    # TEST CASE: Hadoop wire encryption enabled
+
+    properties = {  # hdfs-site
+                    'dfs.encrypt.data.transfer': 'true',  # Wire encryption
+                    'dfs.datanode.address': '0.0.0.0:1019',
+                    'dfs.datanode.http.address': '0.0.0.0:1022',
+    }
+    configurations = {
+      'hdfs-site': {
+        'properties': properties,
+      },
+      'core-site': {
+        'properties': secure_cluster_core_site
+      }
+    }
+    expected = []  # No warnings
+    validation_problems = self.stackAdvisor.validateHDFSConfigurations(properties, recommendedDefaults, configurations)
+    self.assertEquals(validation_problems, expected)

+ 12 - 7
ambari-server/src/test/python/stacks/utils/RMFTestCase.py

@@ -40,6 +40,7 @@ PATH_TO_STACK_TESTS = os.path.normpath("test/python/stacks/")
 
 class RMFTestCase(TestCase):
   def executeScript(self, path, classname=None, command=None, config_file=None,
+                    config_dict=None,
                     # common mocks for all the scripts
                     config_overrides = None,
                     shell_mock_value = (0, "OK."), 
@@ -52,13 +53,17 @@ class RMFTestCase(TestCase):
     stacks_path = os.path.join(src_dir, PATH_TO_STACKS)
     configs_path = os.path.join(src_dir, PATH_TO_STACK_TESTS, stack_version, "configs")
     script_path = os.path.join(stacks_path, norm_path)
-    config_file_path = os.path.join(configs_path, config_file)
-
-    try:
-      with open(config_file_path, "r") as f:
-        self.config_dict = json.load(f)
-    except IOError:
-      raise RuntimeError("Can not read config file: "+ config_file_path)
+    if config_file is not None and config_dict is None:
+      config_file_path = os.path.join(configs_path, config_file)
+      try:
+        with open(config_file_path, "r") as f:
+          self.config_dict = json.load(f)
+      except IOError:
+        raise RuntimeError("Can not read config file: "+ config_file_path)
+    elif config_dict is not None and config_file is None:
+      self.config_dict = config_dict
+    else:
+      raise RuntimeError("Please specify either config_file_path or config_dict parameter")
 
     if config_overrides:
       for key, value in config_overrides.iteritems():

+ 1 - 1
ambari-web/app/messages.js

@@ -656,7 +656,7 @@ Em.I18n.translations = {
   'installer.step7.popup.database.connection.body': 'You have not run or passed the database connection test for: {0}. It is highly recommended that you pass the connection test before proceeding to prevent failures during deployment.',
   'installer.step7.popup.validation.failed.header': 'Validation failed.',
   'installer.step7.popup.validation.failed.body': 'Some services are not properly configured. You have to change the highlighted configs according to the recommended values.',
-  'installer.step7.popup.validation.request.failed.body': 'Config validaition failed.',
+  'installer.step7.popup.validation.request.failed.body': 'Config validation failed.',
   'installer.step7.popup.validation.warning.header': 'Configurations',
   'installer.step7.popup.validation.warning.body': 'Some service configurations are not configured properly. We recommend you review and change the highlighted configuration values. Are you sure you want to proceed without correcting configurations?',
   'installer.step7.oozie.database.new': 'New Derby Database',