9 年之前 · ec942a1ac1
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py
@@ -20,6 +20,7 @@ limitations under the License.
 
				 
			
 
				 import os
			
 
				 import logging
			
 
				+import urlparse
			
 
				 
			
 
				 from resource_management.libraries.functions import file_system
			
 
				 from resource_management.libraries.functions import mounted_dirs_helper
			
@@ -52,6 +53,11 @@ def execute(configurations={}, parameters={}, host_name=None):
 
				   configurations (dictionary): a mapping of configuration key to value
			
 
				   parameters (dictionary): a mapping of script parameter key to value
			
 
				   host_name (string): the name of this host where the alert is running
			
 
				+
			
 
				+  DataNode directories can be of the following formats and each needs to be supported:
			
 
				+    /grid/dn/archive0
			
 
				+    [SSD]/grid/dn/archive0
			
 
				+    [ARCHIVE]file:///grid/dn/archive0
			
 
				   """
			
 
				   warnings = []
			
 
				   errors = []
			
@@ -68,33 +74,40 @@ def execute(configurations={}, parameters={}, host_name=None):
 
				   if dfs_data_dir is None:
			
 
				     return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script and the value is null'.format(DFS_DATA_DIR)])
			
 
				 
			
 
				-  data_dir_mount_file_exists = True
			
 
				   # This follows symlinks and will return False for a broken link (even in the middle of the linked list)
			
 
				+  data_dir_mount_file_exists = True
			
 
				   if not os.path.exists(DATA_DIR_MOUNT_FILE):
			
 
				     data_dir_mount_file_exists = False
			
 
				-    warnings.append("File not found, {0} .".format(DATA_DIR_MOUNT_FILE))
			
 
				+    warnings.append("{0} was not found.".format(DATA_DIR_MOUNT_FILE))
			
 
				 
			
 
				-  valid_data_dirs = set()            # data dirs that have been normalized
			
 
				+  normalized_data_dirs = set()            # data dirs that have been normalized
			
 
				   data_dirs_not_exist = set()        # data dirs that do not exist
			
 
				   data_dirs_unknown = set()          # data dirs for which could not determine mount
			
 
				   data_dirs_on_root = set()          # set of data dirs that are on root mount
			
 
				   data_dirs_on_mount = set()         # set of data dirs that are mounted on a device
			
 
				   data_dirs_unmounted = []           # list of data dirs that are known to have become unmounted
			
 
				 
			
 
				+  # transform each data directory into something that we can use
			
 
				   for data_dir in dfs_data_dir.split(","):
			
 
				     if data_dir is None or data_dir.strip() == "":
			
 
				       continue
			
 
				+
			
 
				     data_dir = data_dir.strip()
			
 
				+
			
 
				     # filter out data storage tags
			
 
				     for tag in DATA_STORAGE_TAGS:
			
 
				       if data_dir.startswith(tag):
			
 
				         data_dir = data_dir.replace(tag, "")
			
 
				         continue
			
 
				-    valid_data_dirs.add(data_dir)
			
 
				+
			
 
				+    # parse the path in case it contains a URI scheme
			
 
				+    data_dir = urlparse.urlparse(data_dir).path
			
 
				+
			
 
				+    normalized_data_dirs.add(data_dir)
			
 
				 
			
 
				   # Sort the data dirs, which is needed for deterministic behavior when running the unit tests.
			
 
				-  valid_data_dirs = sorted(valid_data_dirs)
			
 
				-  for data_dir in valid_data_dirs:
			
 
				+  normalized_data_dirs = sorted(normalized_data_dirs)
			
 
				+  for data_dir in normalized_data_dirs:
			
 
				     # This follows symlinks and will return False for a broken link (even in the middle of the linked list)
			
 
				     if os.path.isdir(data_dir):
			
 
				       curr_mount_point = file_system.get_mount_point_for_dir(data_dir)
			
@@ -111,16 +124,16 @@ def execute(configurations={}, parameters={}, host_name=None):
 
				       data_dirs_not_exist.add(data_dir)
			
 
				 
			
 
				   # To keep the messages consistent for all hosts, sort the sets into lists
			
 
				-  valid_data_dirs = sorted(valid_data_dirs)
			
 
				+  normalized_data_dirs = sorted(normalized_data_dirs)
			
 
				   data_dirs_not_exist = sorted(data_dirs_not_exist)
			
 
				   data_dirs_unknown = sorted(data_dirs_unknown)
			
 
				   data_dirs_on_root = sorted(data_dirs_on_root)
			
 
				 
			
 
				   if data_dirs_not_exist:
			
 
				-    errors.append("Data dir(s) not found: {0} .".format(", ".join(data_dirs_not_exist)))
			
 
				+    errors.append("The following data dir(s) were not found: {0}\n".format("\n".join(data_dirs_not_exist)))
			
 
				 
			
 
				   if data_dirs_unknown:
			
 
				-    errors.append("Cannot find mount point for data dir(s): {0} .".format(", ".join(data_dirs_unknown)))
			
 
				+    errors.append("Cannot find the mount point for the following data dir(s):\n{0}".format("\n".join(data_dirs_unknown)))
			
 
				 
			
 
				   if data_dir_mount_file_exists:
			
 
				     # This dictionary contains the expected values of <data_dir, mount_point>
			
@@ -135,13 +148,13 @@ def execute(configurations={}, parameters={}, host_name=None):
 
				         data_dirs_unmounted.append(data_dir)
			
 
				 
			
 
				     if len(data_dirs_unmounted) > 0:
			
 
				-      errors.append("Detected data dir(s) that became unmounted and are now writing to the root partition: {0} .".format(", ".join(data_dirs_unmounted)))
			
 
				+      errors.append("Detected data dir(s) that became unmounted and are now writing to the root partition:\n{0}".format("\n".join(data_dirs_unmounted)))
			
 
				   else:
			
 
				     # Couldn't make guarantees about the expected value of mount points, so rely on this strategy that is likely to work.
			
 
				     # It will report false positives (aka false alarms) if the user actually intended to have
			
 
				     # 1+ data dirs on a mount and 1+ data dirs on the root partition.
			
 
				     if len(data_dirs_on_mount) >= 1 and len(data_dirs_on_root) >= 1:
			
 
				-      errors.append("Detected at least one data dir on a mount point, but these are writing to the root partition: {0} .".format(", ".join(data_dirs_on_root)))
			
 
				+      errors.append("Detected at least one data dir on a mount point, but these are writing to the root partition:\n{0}".format("\n".join(data_dirs_on_root)))
			
 
				 
			
 
				   # Determine the status based on warnings and errors.
			
 
				   if len(errors) == 0:
			
@@ -153,10 +166,10 @@ def execute(configurations={}, parameters={}, host_name=None):
 
				       status = RESULT_STATE_WARNING
			
 
				       messages += warnings
			
 
				 
			
 
				-    if len(valid_data_dirs) > 0:
			
 
				-      messages.append("Data dir(s) are fine, {0} .".format(", ".join(valid_data_dirs)))
			
 
				+    if len(normalized_data_dirs) > 0:
			
 
				+      messages.append("The following data dir(s) are valid:\n{0}".format("\n".join(normalized_data_dirs)))
			
 
				     else:
			
 
				-      messages.append("No data dirs to analyze.")
			
 
				+      messages.append("There are no data directories to analyze.")
			
 
				 
			
 
				     return (status, ["\n".join(messages)])
			
 
				   else:
			
--- a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_datanode_unmounted_data_dir.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_datanode_unmounted_data_dir.py
@@ -91,7 +91,7 @@ class TestAlertDataNodeUnmountedDataDir(RMFTestCase):
 
				     [status, messages] = alert.execute(configurations=configs)
			
 
				     self.assertEqual(status, RESULT_STATE_WARNING)
			
 
				     self.assertTrue(messages is not None and len(messages) == 1)
			
 
				-    self.assertTrue("File not found, {0}".format(DATA_DIR_MOUNT_HIST_FILE_PATH) in messages[0])
			
 
				+    self.assertTrue("{0} was not found".format(DATA_DIR_MOUNT_HIST_FILE_PATH) in messages[0])
			
 
				 
			
 
				   @patch("resource_management.libraries.functions.mounted_dirs_helper.get_dir_to_mount_from_file")
			
 
				   @patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
			
@@ -117,7 +117,7 @@ class TestAlertDataNodeUnmountedDataDir(RMFTestCase):
 
				     [status, messages] = alert.execute(configurations=configs)
			
 
				     self.assertEqual(status, RESULT_STATE_OK)
			
 
				     self.assertTrue(messages is not None and len(messages) == 1)
			
 
				-    self.assertTrue("Data dir(s) are fine" in messages[0])
			
 
				+    self.assertTrue("The following data dir(s) are valid" in messages[0])
			
 
				 
			
 
				   @patch("resource_management.libraries.functions.mounted_dirs_helper.get_dir_to_mount_from_file")
			
 
				   @patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
			
@@ -142,7 +142,7 @@ class TestAlertDataNodeUnmountedDataDir(RMFTestCase):
 
				     [status, messages] = alert.execute(configurations=configs)
			
 
				     self.assertEqual(status, RESULT_STATE_OK)
			
 
				     self.assertTrue(messages is not None and len(messages) == 1)
			
 
				-    self.assertTrue("Data dir(s) are fine" in messages[0])
			
 
				+    self.assertTrue("The following data dir(s) are valid" in messages[0])
			
 
				 
			
 
				   @patch("resource_management.libraries.functions.mounted_dirs_helper.get_dir_to_mount_from_file")
			
 
				   @patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
			
@@ -166,7 +166,7 @@ class TestAlertDataNodeUnmountedDataDir(RMFTestCase):
 
				     [status, messages] = alert.execute(configurations=configs)
			
 
				     self.assertEqual(status, RESULT_STATE_CRITICAL)
			
 
				     self.assertTrue(messages is not None and len(messages) == 1)
			
 
				-    self.assertTrue("Detected at least one data dir on a mount point, but these are writing to the root partition: /grid/0/data, /grid/1/data" in messages[0])
			
 
				+    self.assertTrue("Detected at least one data dir on a mount point, but these are writing to the root partition:\n/grid/0/data\n/grid/1/data" in messages[0])
			
 
				 
			
 
				   @patch("resource_management.libraries.functions.mounted_dirs_helper.get_dir_to_mount_from_file")
			
 
				   @patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
			
@@ -193,4 +193,28 @@ class TestAlertDataNodeUnmountedDataDir(RMFTestCase):
 
				     [status, messages] = alert.execute(configurations=configs)
			
 
				     self.assertEqual(status, RESULT_STATE_CRITICAL)
			
 
				     self.assertTrue(messages is not None and len(messages) == 1)
			
 
				-    self.assertTrue("Detected data dir(s) that became unmounted and are now writing to the root partition: /grid/1/data ." in messages[0])
			
 
				+    self.assertTrue("Detected data dir(s) that became unmounted and are now writing to the root partition:\n/grid/1/data" in messages[0])
			
 
				+
			
 
				+
			
 
				+  @patch("resource_management.libraries.functions.mounted_dirs_helper.get_dir_to_mount_from_file")
			
 
				+  @patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
			
 
				+  @patch("os.path.exists")
			
 
				+  @patch("os.path.isdir")
			
 
				+  def test_file_uri_and_meta_tags(self, is_dir_mock, exists_mock, get_mount_mock, get_data_dir_to_mount_from_file_mock):
			
 
				+    """
			
 
				+    Test that the status is OK when the locations include file:// schemes and meta tags.
			
 
				+    """
			
 
				+    configs = {
			
 
				+      "{{hdfs-site/dfs.datanode.data.dir}}":"[SSD]file:///grid/0/data"
			
 
				+    }
			
 
				+
			
 
				+    # Mock calls
			
 
				+    exists_mock.return_value = True
			
 
				+    is_dir_mock.return_value = True
			
 
				+    get_mount_mock.return_value = "/"
			
 
				+    get_data_dir_to_mount_from_file_mock.return_value = {"/grid/0/data":"/"}
			
 
				+
			
 
				+    [status, messages] = alert.execute(configurations = configs)
			
 
				+    self.assertEqual(status, RESULT_STATE_OK)
			
 
				+    self.assertTrue(messages is not None and len(messages) == 1)
			
 
				+    self.assertEqual("The following data dir(s) are valid:\n/grid/0/data", messages[0])