Browse Source

AMBARI-12374. Unable to Start NameNode in HA Mode On HDP 2.0 (dlysnichenko)

Lisnichenko Dmitro 10 years ago
parent
commit
b0a28fecee

+ 2 - 2
ambari-common/src/main/python/resource_management/libraries/functions/namenode_ha_utils.py

@@ -29,7 +29,7 @@ HDFS_NN_STATE_STANDBY = 'standby'
 
 
 NAMENODE_HTTP_FRAGMENT = 'dfs.namenode.http-address.{0}.{1}'
 NAMENODE_HTTP_FRAGMENT = 'dfs.namenode.http-address.{0}.{1}'
 NAMENODE_HTTPS_FRAGMENT = 'dfs.namenode.https-address.{0}.{1}'
 NAMENODE_HTTPS_FRAGMENT = 'dfs.namenode.https-address.{0}.{1}'
-JMX_URI_FRAGMENT = "{0}://{1}/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus"
+JMX_URI_FRAGMENT = "{0}://{1}/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem"
   
   
 def get_namenode_states(hdfs_site, security_enabled, run_user):
 def get_namenode_states(hdfs_site, security_enabled, run_user):
   """
   """
@@ -62,7 +62,7 @@ def get_namenode_states(hdfs_site, security_enabled, run_user):
 
 
       jmx_uri = JMX_URI_FRAGMENT.format(protocol, value)
       jmx_uri = JMX_URI_FRAGMENT.format(protocol, value)
       
       
-      state = get_value_from_jmx(jmx_uri, 'State', security_enabled, run_user, is_https_enabled)
+      state = get_value_from_jmx(jmx_uri, 'tag.HAState', security_enabled, run_user, is_https_enabled)
       
       
       if state == HDFS_NN_STATE_ACTIVE:
       if state == HDFS_NN_STATE_ACTIVE:
         active_namenodes.append((nn_unique_id, value))
         active_namenodes.append((nn_unique_id, value))

+ 6 - 4
ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/journalnode_upgrade.py

@@ -23,7 +23,9 @@ from resource_management.core.logger import Logger
 from resource_management.core.resources.system import Execute
 from resource_management.core.resources.system import Execute
 from resource_management.libraries.functions.default import default
 from resource_management.libraries.functions.default import default
 from resource_management.core.exceptions import Fail
 from resource_management.core.exceptions import Fail
-from utils import get_jmx_data
+import utils
+from resource_management.libraries.functions.jmx import get_value_from_jmx
+import namenode_ha_state
 from namenode_ha_state import NAMENODE_STATE, NamenodeHAState
 from namenode_ha_state import NAMENODE_STATE, NamenodeHAState
 
 
 
 
@@ -48,14 +50,14 @@ def post_upgrade_check():
     raise Fail("Need at least 3 Journalnodes to maintain a quorum")
     raise Fail("Need at least 3 Journalnodes to maintain a quorum")
 
 
   try:
   try:
-    namenode_ha = NamenodeHAState()
+    namenode_ha = namenode_ha_state.NamenodeHAState()
   except ValueError, err:
   except ValueError, err:
     raise Fail("Could not retrieve Namenode HA addresses. Error: " + str(err))
     raise Fail("Could not retrieve Namenode HA addresses. Error: " + str(err))
 
 
   Logger.info(str(namenode_ha))
   Logger.info(str(namenode_ha))
   nn_address = namenode_ha.get_address(NAMENODE_STATE.ACTIVE)
   nn_address = namenode_ha.get_address(NAMENODE_STATE.ACTIVE)
 
 
-  nn_data = get_jmx_data(nn_address, 'org.apache.hadoop.hdfs.server.namenode.FSNamesystem', 'JournalTransactionInfo',
+  nn_data = utils.get_jmx_data(nn_address, 'org.apache.hadoop.hdfs.server.namenode.FSNamesystem', 'JournalTransactionInfo',
                          namenode_ha.is_encrypted(), params.security_enabled)
                          namenode_ha.is_encrypted(), params.security_enabled)
   if not nn_data:
   if not nn_data:
     raise Fail("Could not retrieve JournalTransactionInfo from JMX")
     raise Fail("Could not retrieve JournalTransactionInfo from JMX")
@@ -121,7 +123,7 @@ def ensure_jns_have_new_txn(nodes, last_txn_id):
         continue
         continue
 
 
       url = '%s://%s:%s' % (protocol, node, params.journalnode_port)
       url = '%s://%s:%s' % (protocol, node, params.journalnode_port)
-      data = get_jmx_data(url, 'Journal-', 'LastWrittenTxId', params.https_only, params.security_enabled)
+      data = utils.get_jmx_data(url, 'Journal-', 'LastWrittenTxId', params.https_only, params.security_enabled)
       if data:
       if data:
         actual_txn_ids[node] = int(data)
         actual_txn_ids[node] = int(data)
         if actual_txn_ids[node] >= last_txn_id:
         if actual_txn_ids[node] >= last_txn_id:

+ 2 - 2
ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode_ha_state.py

@@ -57,7 +57,7 @@ class NamenodeHAState:
     policy = default("/configurations/hdfs-site/dfs.http.policy", "HTTP_ONLY")
     policy = default("/configurations/hdfs-site/dfs.http.policy", "HTTP_ONLY")
     self.encrypted = policy.upper() == "HTTPS_ONLY"
     self.encrypted = policy.upper() == "HTTPS_ONLY"
 
 
-    jmx_uri_fragment = ("https" if self.encrypted else "http") + "://{0}/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus"
+    jmx_uri_fragment = ("https" if self.encrypted else "http") + "://{0}/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem"
     namenode_http_fragment = "dfs.namenode.http-address.{0}.{1}"
     namenode_http_fragment = "dfs.namenode.http-address.{0}.{1}"
     namenode_https_fragment = "dfs.namenode.https-address.{0}.{1}"
     namenode_https_fragment = "dfs.namenode.https-address.{0}.{1}"
 
 
@@ -81,7 +81,7 @@ class NamenodeHAState:
           raise Exception("Could not retrieve hostname from address " + actual_value)
           raise Exception("Could not retrieve hostname from address " + actual_value)
 
 
         jmx_uri = jmx_uri_fragment.format(actual_value)
         jmx_uri = jmx_uri_fragment.format(actual_value)
-        state = get_value_from_jmx(jmx_uri, "State", params.security_enabled, params.hdfs_user, params.is_https_enabled)
+        state = get_value_from_jmx(jmx_uri, "tag.HAState", params.security_enabled, params.hdfs_user, params.is_https_enabled)
 
 
         if not state:
         if not state:
           raise Exception("Could not retrieve Namenode state from URL " + jmx_uri)
           raise Exception("Could not retrieve Namenode state from URL " + jmx_uri)

+ 29 - 20
ambari-server/src/test/python/stacks/2.0.6/HDFS/test_journalnode.py

@@ -260,9 +260,7 @@ class TestJournalnode(RMFTestCase):
 
 
 
 
   @patch('time.sleep')
   @patch('time.sleep')
-  @patch("urllib2.urlopen")
-  @patch("utils.curl_krb_request")
-  def test_post_rolling_restart(self, curl_krb_request_mock, urlopen_mock, time_mock):
+  def test_post_rolling_restart(self, time_mock):
     # load the NN and JN JMX files so that the urllib2.urlopen mock has data
     # load the NN and JN JMX files so that the urllib2.urlopen mock has data
     # to return
     # to return
     num_journalnodes = 3
     num_journalnodes = 3
@@ -283,33 +281,44 @@ class TestJournalnode(RMFTestCase):
     namenode_status_active = open(namenode_status_active_file, 'r').read()
     namenode_status_active = open(namenode_status_active_file, 'r').read()
     namenode_status_standby = open(namenode_status_standby_file, 'r').read()
     namenode_status_standby = open(namenode_status_standby_file, 'r').read()
 
 
+    import utils
+    import urllib2
+    from namenode_ha_state import NamenodeHAState
+
     url_stream_mock = MagicMock()
     url_stream_mock = MagicMock()
     url_stream_mock.read.side_effect = (num_journalnodes * [namenode_jmx, journalnode_jmx])
     url_stream_mock.read.side_effect = (num_journalnodes * [namenode_jmx, journalnode_jmx])
-
-    urlopen_mock.return_value = url_stream_mock
-
-    # run the post_rolling_restart using the data from above
-    self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/journalnode.py",
-      classname = "JournalNode", command = "post_rolling_restart",
-      config_file = "journalnode-upgrade.json",
-      checked_call_mocks = [(0, str(namenode_status_active)), (0, str(namenode_status_standby))],
-      hdp_stack_version = self.UPGRADE_STACK_VERSION,
-      target = RMFTestCase.TARGET_COMMON_SERVICES )
+    urlopen_mock = MagicMock(return_value = url_stream_mock)
+    #urlopen_mock.return_value = url_stream_mock
+    curl_krb_request_mock = MagicMock(side_effect=(num_journalnodes * [(namenode_jmx, "", 1), (journalnode_jmx, "", 1)]))
+    get_address_mock = MagicMock(return_value="c6406.ambari.apache.org")
+    with patch.object(utils, "curl_krb_request", curl_krb_request_mock):
+      with patch.object(urllib2, "urlopen", urlopen_mock):
+       with patch.object(NamenodeHAState, "get_address", get_address_mock):
+         self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/journalnode.py",
+           classname = "JournalNode", command = "post_rolling_restart",
+           config_file = "journalnode-upgrade.json",
+           checked_call_mocks = [(0, str(namenode_status_active)), (0, str(namenode_status_standby))],
+           hdp_stack_version = self.UPGRADE_STACK_VERSION,
+           target = RMFTestCase.TARGET_COMMON_SERVICES )
 
 
     # ensure that the mock was called with the http-style version of the URL
     # ensure that the mock was called with the http-style version of the URL
     urlopen_mock.assert_called
     urlopen_mock.assert_called
     urlopen_mock.assert_called_with("http://c6407.ambari.apache.org:8480/jmx")
     urlopen_mock.assert_called_with("http://c6407.ambari.apache.org:8480/jmx")
 
 
     url_stream_mock.reset_mock()
     url_stream_mock.reset_mock()
-    curl_krb_request_mock.side_effect = (num_journalnodes * [(namenode_jmx, "", 1), (journalnode_jmx, "", 1)])
+    curl_krb_request_mock.reset_mock()
+    get_address_mock.reset_mock()
 
 
     # now try with HDFS on SSL
     # now try with HDFS on SSL
-    self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/journalnode.py",
-      classname = "JournalNode", command = "post_rolling_restart",
-      config_file = "journalnode-upgrade-hdfs-secure.json",
-      checked_call_mocks = [(0, str(namenode_status_active)), (0, str(namenode_status_standby))],
-      hdp_stack_version = self.UPGRADE_STACK_VERSION,
-      target = RMFTestCase.TARGET_COMMON_SERVICES )
+    with patch.object(utils, "curl_krb_request", curl_krb_request_mock):
+      with patch.object(urllib2, "urlopen", urlopen_mock):
+        with patch.object(NamenodeHAState, "get_address", get_address_mock):
+         self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/journalnode.py",
+           classname = "JournalNode", command = "post_rolling_restart",
+           config_file = "journalnode-upgrade-hdfs-secure.json",
+           checked_call_mocks = [(0, str(namenode_status_active)), (0, str(namenode_status_standby))],
+           hdp_stack_version = self.UPGRADE_STACK_VERSION,
+           target = RMFTestCase.TARGET_COMMON_SERVICES )
 
 
     # ensure that the mock was called with the http-style version of the URL
     # ensure that the mock was called with the http-style version of the URL
     curl_krb_request_mock.assert_called
     curl_krb_request_mock.assert_called