Browse Source

AMBARI-10280. Need to check if rpcbind or portmap is started before starting NFS Gateway (Brandon Li via alejandro)

Alejandro Fernandez 10 years ago
parent
commit
d331e85329

+ 39 - 3
ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_nfsgateway.py

@@ -17,26 +17,62 @@ limitations under the License.
 
 """
 
+from resource_management.core.exceptions import Fail
+from resource_management.core.logger import Logger
 from resource_management.core.resources import Directory
+from resource_management.core import shell
 from utils import service
 from utils import hdfs_directory
+import subprocess,os
+
+# NFS GATEWAY is always started by root using jsvc due to rpcbind bugs
+# on Linux such as CentOS6.2. https://bugzilla.redhat.com/show_bug.cgi?id=731542
+
+def prepare_rpcbind():
+  Logger.info("check if native nfs server is running")
+  p, output = shell.call("pgrep nfsd")
+  if p == 0 :
+    Logger.info("native nfs server is running. shutting it down...")
+    # shutdown nfs
+    shell.call("service nfs stop")
+    shell.call("service nfs-kernel-server stop")
+    Logger.info("check if the native nfs server is down...")
+    p, output = shell.call("pgrep nfsd")
+    if p == 0 :
+      raise Fail("Failed to shutdown native nfs service")
+
+  Logger.info("check if rpcbind or portmap is running")
+  p, output = shell.call("pgrep rpcbind")
+  q, output = shell.call("pgrep portmap")
+
+  if p!=0 and q!=0 :
+    Logger.info("no portmap or rpcbind running. starting one...")
+    p, output = shell.call("service rpcbind start")
+    q, output = shell.call("service portmap start")
+    if p!=0 and q!=0 :
+      raise Fail("Failed to start rpcbind or portmap")
+
+  Logger.info("now we are ready to start nfs gateway")
 
 
 def nfsgateway(action=None, format=False):
   import params
 
+  if action== "start":
+    prepare_rpcbind()
+
   if action == "configure":
     return
   elif action == "start" or action == "stop":
     Directory(params.hadoop_pid_dir_prefix,
               mode=0755,
-              owner=params.hdfs_user,
-              group=params.user_group
+              owner=params.root_user,
+              group=params.root_group
     )
     service(
       action=action,
       name="nfs3",
-      user=params.hdfs_user,
+      user=params.root_user,
       create_pid_dir=True,
       create_log_dir=True
     )

+ 2 - 0
ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/params.py

@@ -41,6 +41,7 @@ version = default("/commandParams/version", None)
 
 security_enabled = config['configurations']['cluster-env']['security_enabled']
 hdfs_user = status_params.hdfs_user
+root_user = "root"
 hadoop_pid_dir_prefix = status_params.hadoop_pid_dir_prefix
 
 # Some datanode settings
@@ -157,6 +158,7 @@ mapred_user = config['configurations']['mapred-env']['mapred_user']
 hdfs_principal_name = default('/configurations/hadoop-env/hdfs_principal_name', None)
 
 user_group = config['configurations']['cluster-env']['user_group']
+root_group = "root"
 proxyuser_group =  config['configurations']['hadoop-env']['proxyuser_group']
 
 #hadoop params

+ 1 - 1
ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/status_params.py

@@ -29,7 +29,7 @@ namenode_pid_file = format("{hdp_pid_dir}/hadoop-{hdfs_user}-namenode.pid")
 snamenode_pid_file = format("{hdp_pid_dir}/hadoop-{hdfs_user}-secondarynamenode.pid")
 journalnode_pid_file = format("{hdp_pid_dir}/hadoop-{hdfs_user}-journalnode.pid")
 zkfc_pid_file = format("{hdp_pid_dir}/hadoop-{hdfs_user}-zkfc.pid")
-nfsgateway_pid_file = format("{hdp_pid_dir}/hadoop-{hdfs_user}-nfs3.pid")
+nfsgateway_pid_file = format("{hadoop_pid_dir_prefix}/root/hadoop_privileged_nfs3.pid")
 
 # Security related/required params
 hostname = config['hostname']

+ 15 - 4
ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/utils.py

@@ -147,6 +147,21 @@ def service(action=None, name=None, user=None, options="", create_pid_dir=False,
   options = options if options else ""
   pid_dir = format("{hadoop_pid_dir_prefix}/{user}")
   pid_file = format("{pid_dir}/hadoop-{user}-{name}.pid")
+  hadoop_env_exports = {
+    'HADOOP_LIBEXEC_DIR': params.hadoop_libexec_dir
+  }
+  # NFS GATEWAY is always started by root using jsvc due to rpcbind bugs
+  # on Linux such as CentOS6.2. https://bugzilla.redhat.com/show_bug.cgi?id=731542
+  if name == "nfs3" :
+    pid_file = format(
+      "{hadoop_pid_dir_prefix}/root/hadoop_privileged_nfs3.pid")
+
+    print pid_file
+    custom_export = {
+      'HADOOP_PRIVILEGED_NFS_USER': params.hdfs_user
+    }
+    hadoop_env_exports.update(custom_export)
+
   log_dir = format("{hdfs_log_dir_prefix}/{user}")
   check_process = format(
     "ls {pid_file} >/dev/null 2>&1 &&"
@@ -161,10 +176,6 @@ def service(action=None, name=None, user=None, options="", create_pid_dir=False,
               owner=user,
               recursive=True)
 
-  hadoop_env_exports = {
-    'HADOOP_LIBEXEC_DIR': params.hadoop_libexec_dir
-  }
-
   if params.security_enabled and name == "datanode":
     ## The directory where pid files are stored in the secure data environment.
     hadoop_secure_dn_pid_dir = format("{hadoop_pid_dir_prefix}/{hdfs_user}")

+ 52 - 47
ambari-server/src/test/python/stacks/2.0.6/HDFS/test_nfsgateway.py

@@ -21,7 +21,8 @@ import os
 from stacks.utils.RMFTestCase import *
 from mock.mock import MagicMock, patch
 
-
+# NFS GATEWAY is always started by root using jsvc due to rpcbind bugs
+# on Linux such as CentOS6.2. https://bugzilla.redhat.com/show_bug.cgi?id=731542
 class TestNFSGateway(RMFTestCase):
   COMMON_SERVICES_PACKAGE_DIR = "HDFS/2.1.0.2.0/package"
   STACK_VERSION = "2.0.6"
@@ -38,7 +39,9 @@ class TestNFSGateway(RMFTestCase):
     self.assert_configure_default()
     self.assertNoMoreResources()
 
-  def test_start_default(self):
+  @patch("hdfs_nfsgateway.prepare_rpcbind")
+  def test_start_default(self, prepare_rpcbind_mock):
+    prepare_rpcbind_mock.returnvalue = 0
     self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/nfsgateway.py",
                        classname = "NFSGateway",
                        command = "start",
@@ -48,25 +51,25 @@ class TestNFSGateway(RMFTestCase):
     )
     self.assert_configure_default()
     self.assertResourceCalled('Directory', '/var/run/hadoop',
-                              owner = 'hdfs',
-                              group = 'hadoop',
+                              owner = 'root',
+                              group = 'root',
                               mode = 0755
                               )
-    self.assertResourceCalled('Directory', '/var/run/hadoop/hdfs',
-                              owner = 'hdfs',
+    self.assertResourceCalled('Directory', '/var/run/hadoop/root',
+                              owner = 'root',
                               recursive = True,
                               )
-    self.assertResourceCalled('Directory', '/var/log/hadoop/hdfs',
-                              owner = 'hdfs',
+    self.assertResourceCalled('Directory', '/var/log/hadoop/root',
+                              owner = 'root',
                               recursive = True,
                               )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-nfs3.pid',
+    self.assertResourceCalled('File', '/var/run/hadoop/root/hadoop_privileged_nfs3.pid',
                               action = ['delete'],
-                              not_if='ls /var/run/hadoop/hdfs/hadoop-hdfs-nfs3.pid >/dev/null 2>&1 && ps -p `cat /var/run/hadoop/hdfs/hadoop-hdfs-nfs3.pid` >/dev/null 2>&1',
+                              not_if='ls /var/run/hadoop/root/hadoop_privileged_nfs3.pid >/dev/null 2>&1 && ps -p `cat /var/run/hadoop/root/hadoop_privileged_nfs3.pid` >/dev/null 2>&1',
                               )
-    self.assertResourceCalled('Execute', "ambari-sudo.sh su hdfs -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]ulimit -c unlimited ;  /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf start nfs3'",
-        environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
-        not_if = 'ls /var/run/hadoop/hdfs/hadoop-hdfs-nfs3.pid >/dev/null 2>&1 && ps -p `cat /var/run/hadoop/hdfs/hadoop-hdfs-nfs3.pid` >/dev/null 2>&1',
+    self.assertResourceCalled('Execute', "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf start nfs3",
+        environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec', 'HADOOP_PRIVILEGED_NFS_USER': 'hdfs'},
+        not_if = 'ls /var/run/hadoop/root/hadoop_privileged_nfs3.pid >/dev/null 2>&1 && ps -p `cat /var/run/hadoop/root/hadoop_privileged_nfs3.pid` >/dev/null 2>&1',
     )
     self.assertNoMoreResources()
 
@@ -79,27 +82,27 @@ class TestNFSGateway(RMFTestCase):
                        target = RMFTestCase.TARGET_COMMON_SERVICES
     )
     self.assertResourceCalled('Directory', '/var/run/hadoop',
-                              owner = 'hdfs',
-                              group = 'hadoop',
+                              owner = 'root',
+                              group = 'root',
                               mode = 0755
                               )
-    self.assertResourceCalled('Directory', '/var/run/hadoop/hdfs',
-                              owner = 'hdfs',
+    self.assertResourceCalled('Directory', '/var/run/hadoop/root',
+                              owner = 'root',
                               recursive = True,
                               )
-    self.assertResourceCalled('Directory', '/var/log/hadoop/hdfs',
-                              owner = 'hdfs',
+    self.assertResourceCalled('Directory', '/var/log/hadoop/root',
+                              owner = 'root',
                               recursive = True,
                               )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-nfs3.pid',
+    self.assertResourceCalled('File', '/var/run/hadoop/root/hadoop_privileged_nfs3.pid',
                               action = ['delete'],
-                              not_if='ls /var/run/hadoop/hdfs/hadoop-hdfs-nfs3.pid >/dev/null 2>&1 && ps -p `cat /var/run/hadoop/hdfs/hadoop-hdfs-nfs3.pid` >/dev/null 2>&1',
+                              not_if='ls /var/run/hadoop/root/hadoop_privileged_nfs3.pid >/dev/null 2>&1 && ps -p `cat /var/run/hadoop/root/hadoop_privileged_nfs3.pid` >/dev/null 2>&1',
                               )
-    self.assertResourceCalled('Execute', "ambari-sudo.sh su hdfs -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]ulimit -c unlimited ;  /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf stop nfs3'",
-        environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
+    self.assertResourceCalled('Execute', "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf stop nfs3",
+        environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec', 'HADOOP_PRIVILEGED_NFS_USER': 'hdfs'},
         not_if = None,
     )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-nfs3.pid',
+    self.assertResourceCalled('File', '/var/run/hadoop/root/hadoop_privileged_nfs3.pid',
                               action = ['delete'],
                               )
     self.assertNoMoreResources()
@@ -115,7 +118,9 @@ class TestNFSGateway(RMFTestCase):
     self.assert_configure_secured()
     self.assertNoMoreResources()
 
-  def test_start_secured(self):
+  @patch("hdfs_nfsgateway.prepare_rpcbind")
+  def test_start_secured(self, prepare_rpcbind_mock):
+    prepare_rpcbind_mock.returnvalue = 0
     self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/nfsgateway.py",
                        classname = "NFSGateway",
                        command = "start",
@@ -125,25 +130,25 @@ class TestNFSGateway(RMFTestCase):
     )
     self.assert_configure_secured()
     self.assertResourceCalled('Directory', '/var/run/hadoop',
-                              owner = 'hdfs',
-                              group = 'hadoop',
+                              owner = 'root',
+                              group = 'root',
                               mode = 0755
                               )
-    self.assertResourceCalled('Directory', '/var/run/hadoop/hdfs',
-                              owner = 'hdfs',
+    self.assertResourceCalled('Directory', '/var/run/hadoop/root',
+                              owner = 'root',
                               recursive = True,
                               )
-    self.assertResourceCalled('Directory', '/var/log/hadoop/hdfs',
-                              owner = 'hdfs',
+    self.assertResourceCalled('Directory', '/var/log/hadoop/root',
+                              owner = 'root',
                               recursive = True,
                               )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-nfs3.pid',
+    self.assertResourceCalled('File', '/var/run/hadoop/root/hadoop_privileged_nfs3.pid',
                               action = ['delete'],
-                              not_if='ls /var/run/hadoop/hdfs/hadoop-hdfs-nfs3.pid >/dev/null 2>&1 && ps -p `cat /var/run/hadoop/hdfs/hadoop-hdfs-nfs3.pid` >/dev/null 2>&1',
+                              not_if='ls /var/run/hadoop/root/hadoop_privileged_nfs3.pid >/dev/null 2>&1 && ps -p `cat /var/run/hadoop/root/hadoop_privileged_nfs3.pid` >/dev/null 2>&1',
                               )
-    self.assertResourceCalled('Execute', "ambari-sudo.sh su hdfs -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]ulimit -c unlimited ;  /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf start nfs3'",
-        environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
-        not_if = 'ls /var/run/hadoop/hdfs/hadoop-hdfs-nfs3.pid >/dev/null 2>&1 && ps -p `cat /var/run/hadoop/hdfs/hadoop-hdfs-nfs3.pid` >/dev/null 2>&1',
+    self.assertResourceCalled('Execute', "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf start nfs3",
+        environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec', 'HADOOP_PRIVILEGED_NFS_USER': 'hdfs'},
+        not_if = 'ls /var/run/hadoop/root/hadoop_privileged_nfs3.pid >/dev/null 2>&1 && ps -p `cat /var/run/hadoop/root/hadoop_privileged_nfs3.pid` >/dev/null 2>&1',
     )
     self.assertNoMoreResources()
 
@@ -156,27 +161,27 @@ class TestNFSGateway(RMFTestCase):
                        target = RMFTestCase.TARGET_COMMON_SERVICES
     )
     self.assertResourceCalled('Directory', '/var/run/hadoop',
-                              owner = 'hdfs',
-                              group = 'hadoop',
+                              owner = 'root',
+                              group = 'root',
                               mode = 0755
                               )
-    self.assertResourceCalled('Directory', '/var/run/hadoop/hdfs',
-                              owner = 'hdfs',
+    self.assertResourceCalled('Directory', '/var/run/hadoop/root',
+                              owner = 'root',
                               recursive = True,
                               )
-    self.assertResourceCalled('Directory', '/var/log/hadoop/hdfs',
-                              owner = 'hdfs',
+    self.assertResourceCalled('Directory', '/var/log/hadoop/root',
+                              owner = 'root',
                               recursive = True,
                               )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-nfs3.pid',
+    self.assertResourceCalled('File', '/var/run/hadoop/root/hadoop_privileged_nfs3.pid',
                               action = ['delete'],
-                              not_if='ls /var/run/hadoop/hdfs/hadoop-hdfs-nfs3.pid >/dev/null 2>&1 && ps -p `cat /var/run/hadoop/hdfs/hadoop-hdfs-nfs3.pid` >/dev/null 2>&1',
+                              not_if='ls /var/run/hadoop/root/hadoop_privileged_nfs3.pid >/dev/null 2>&1 && ps -p `cat /var/run/hadoop/root/hadoop_privileged_nfs3.pid` >/dev/null 2>&1',
                               )
-    self.assertResourceCalled('Execute', "ambari-sudo.sh su hdfs -l -s /bin/bash -c '[RMF_EXPORT_PLACEHOLDER]ulimit -c unlimited ;  /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf stop nfs3'",
-        environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
+    self.assertResourceCalled('Execute', "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E /usr/lib/hadoop/sbin/hadoop-daemon.sh --config /etc/hadoop/conf stop nfs3",
+        environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec', 'HADOOP_PRIVILEGED_NFS_USER': 'hdfs'},
         not_if = None,
     )
-    self.assertResourceCalled('File', '/var/run/hadoop/hdfs/hadoop-hdfs-nfs3.pid',
+    self.assertResourceCalled('File', '/var/run/hadoop/root/hadoop_privileged_nfs3.pid',
                               action = ['delete'],
                               )
     self.assertNoMoreResources()