Explorar o código

AMBARI-13161 Master heap setting in distributed mode should be limited (dsen)

Dmytro Sen %!s(int64=9) %!d(string=hai) anos
pai
achega
d2ae799f72

+ 22 - 2
ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-hbase-env.xml

@@ -33,11 +33,21 @@
   </property>
   </property>
   <property>
   <property>
     <name>hbase_regionserver_heapsize</name>
     <name>hbase_regionserver_heapsize</name>
-    <value>1024m</value>
+    <value>512m</value>
     <description>
     <description>
         HBase RegionServer Heap Size. In embedded mode, total heap size is
         HBase RegionServer Heap Size. In embedded mode, total heap size is
         sum of master and regionserver heap sizes.
         sum of master and regionserver heap sizes.
     </description>
     </description>
+    <depends-on>
+      <property>
+        <type>ams-hbase-site</type>
+        <name>hbase.cluster.distributed</name>
+      </property>
+      <property>
+        <type>ams-hbase-site</type>
+        <name>hbase.rootdir</name>
+      </property>
+    </depends-on>
   </property>
   </property>
   <property>
   <property>
     <name>regionserver_xmn_size</name>
     <name>regionserver_xmn_size</name>
@@ -63,11 +73,21 @@
   </property>
   </property>
   <property>
   <property>
     <name>hbase_master_heapsize</name>
     <name>hbase_master_heapsize</name>
-    <value>1024m</value>
+    <value>512m</value>
     <description>
     <description>
         HBase Master Heap Size. In embedded mode, total heap size is
         HBase Master Heap Size. In embedded mode, total heap size is
         sum of master and regionserver heap sizes.
         sum of master and regionserver heap sizes.
     </description>
     </description>
+    <depends-on>
+      <property>
+        <type>ams-hbase-site</type>
+        <name>hbase.cluster.distributed</name>
+      </property>
+      <property>
+        <type>ams-hbase-site</type>
+        <name>hbase.rootdir</name>
+      </property>
+    </depends-on>
   </property>
   </property>
   <property>
   <property>
     <name>max_open_files_limit</name>
     <name>max_open_files_limit</name>

+ 20 - 0
ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-hbase-site.xml

@@ -313,4 +313,24 @@
       a normal table and would return items in rowkey order for scans
       a normal table and would return items in rowkey order for scans
     </description>
     </description>
   </property>
   </property>
+
+  <property>
+    <name>dfs.client.read.shortcircuit</name>
+    <value>true</value>
+    <description>Enable/Disable short circuit read for your client.
+      Hadoop servers should be configured to allow short circuit read
+      for the hbase user for this to take effect
+    </description>
+    <depends-on>
+      <property>
+        <type>ams-hbase-site</type>
+        <name>hbase.cluster.distributed</name>
+      </property>
+      <property>
+        <type>ams-hbase-site</type>
+        <name>hbase.rootdir</name>
+      </property>
+    </depends-on>
+  </property>
+
 </configuration>
 </configuration>

+ 5 - 0
ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/ams_service.py

@@ -22,6 +22,7 @@ from resource_management import *
 from ambari_commons import OSConst
 from ambari_commons import OSConst
 from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl
 from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl
 from hbase_service import hbase_service
 from hbase_service import hbase_service
+import os
 
 
 @OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
 @OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
 def ams_service(name, action):
 def ams_service(name, action):
@@ -53,6 +54,10 @@ def ams_service(name, action):
       Execute(format("{sudo} rm -rf {hbase_tmp_dir}/*.tmp")
       Execute(format("{sudo} rm -rf {hbase_tmp_dir}/*.tmp")
       )
       )
 
 
+      if not params.is_hbase_distributed and os.path.exists(format("{zookeeper_data_dir}")):
+        Execute(format("{sudo} rm -rf {zookeeper_data_dir}/*")
+        )
+
       daemon_cmd = format("{cmd} start")
       daemon_cmd = format("{cmd} start")
       Execute(daemon_cmd,
       Execute(daemon_cmd,
               user=params.ams_user
               user=params.ams_user

+ 11 - 11
ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/hbase.py

@@ -173,6 +173,17 @@ def hbase(name=None # 'master' or 'regionserver' or 'client'
     hbase_TemplateConfig( format("hbase_client_jaas.conf"), user=params.hbase_user)
     hbase_TemplateConfig( format("hbase_client_jaas.conf"), user=params.hbase_user)
     hbase_TemplateConfig( format("ams_zookeeper_jaas.conf"), user=params.hbase_user)
     hbase_TemplateConfig( format("ams_zookeeper_jaas.conf"), user=params.hbase_user)
 
 
+  if name != "client":
+    Directory( params.hbase_pid_dir,
+               owner = params.hbase_user,
+               recursive = True
+    )
+
+    Directory (params.hbase_log_dir,
+               owner = params.hbase_user,
+               recursive = True
+    )
+
   if name == "master":
   if name == "master":
 
 
     if params.is_hbase_distributed:
     if params.is_hbase_distributed:
@@ -218,17 +229,6 @@ def hbase(name=None # 'master' or 'regionserver' or 'client'
 
 
       File(format("{params.hbase_pid_dir}/distributed_mode"), action="delete", owner=params.hbase_user)
       File(format("{params.hbase_pid_dir}/distributed_mode"), action="delete", owner=params.hbase_user)
 
 
-  if name != "client":
-    Directory( params.hbase_pid_dir,
-      owner = params.hbase_user,
-      recursive = True
-    )
-
-    Directory (params.hbase_log_dir,
-      owner = params.hbase_user,
-      recursive = True
-    )
-
   if params.hbase_log4j_props is not None:
   if params.hbase_log4j_props is not None:
     File(format("{params.hbase_conf_dir}/log4j.properties"),
     File(format("{params.hbase_conf_dir}/log4j.properties"),
          mode=0644,
          mode=0644,

+ 34 - 10
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/stack_advisor.py

@@ -369,6 +369,15 @@ class HDP206StackAdvisor(DefaultStackAdvisor):
       putHbaseEnvProperty("hbase_master_heapsize", str(hbase_heapsize) + "m")
       putHbaseEnvProperty("hbase_master_heapsize", str(hbase_heapsize) + "m")
       putHbaseEnvProperty("hbase_master_xmn_size", hbase_xmn_size)
       putHbaseEnvProperty("hbase_master_xmn_size", hbase_xmn_size)
 
 
+    # If no local DN in distributed mode
+    if rootDir.startswith("hdfs://"):
+      dn_hosts = self.getComponentHostNames(services, "HDFS", "DATANODE")
+      if set(amsCollectorHosts).intersection(dn_hosts):
+        collector_cohosted_with_dn = "true"
+      else:
+        collector_cohosted_with_dn = "false"
+      putAmsHbaseSiteProperty("dfs.client.read.shortcircuit", collector_cohosted_with_dn)
+
     #split points
     #split points
     scriptDir = os.path.dirname(os.path.abspath(__file__))
     scriptDir = os.path.dirname(os.path.abspath(__file__))
     metricsDir = os.path.join(scriptDir, '../../../../common-services/AMBARI_METRICS/0.1.0/package')
     metricsDir = os.path.join(scriptDir, '../../../../common-services/AMBARI_METRICS/0.1.0/package')
@@ -651,6 +660,7 @@ class HDP206StackAdvisor(DefaultStackAdvisor):
           validationItems.extend([{"config-name": 'hbase.rootdir', "item": self.validatorNotRootFs(properties, recommendedDefaults, 'hbase.rootdir', host["Hosts"])}])
           validationItems.extend([{"config-name": 'hbase.rootdir', "item": self.validatorNotRootFs(properties, recommendedDefaults, 'hbase.rootdir', host["Hosts"])}])
           validationItems.extend([{"config-name": 'hbase.tmp.dir', "item": self.validatorNotRootFs(properties, recommendedDefaults, 'hbase.tmp.dir', host["Hosts"])}])
           validationItems.extend([{"config-name": 'hbase.tmp.dir', "item": self.validatorNotRootFs(properties, recommendedDefaults, 'hbase.tmp.dir', host["Hosts"])}])
 
 
+          dn_hosts = self.getComponentHostNames(services, "HDFS", "DATANODE")
           if not hbase_rootdir.startswith("hdfs"):
           if not hbase_rootdir.startswith("hdfs"):
             mountPoints = []
             mountPoints = []
             for mountPoint in host["Hosts"]["disk_info"]:
             for mountPoint in host["Hosts"]["disk_info"]:
@@ -669,7 +679,6 @@ class HDP206StackAdvisor(DefaultStackAdvisor):
             # if METRICS_COLLECTOR is co-hosted with DATANODE
             # if METRICS_COLLECTOR is co-hosted with DATANODE
             # cross-check dfs.datanode.data.dir and hbase.rootdir
             # cross-check dfs.datanode.data.dir and hbase.rootdir
             # they shouldn't share same disk partition IO
             # they shouldn't share same disk partition IO
-            dn_hosts = self.getComponentHostNames(services, "HDFS", "DATANODE")
             hdfs_site = getSiteProperties(configurations, "hdfs-site")
             hdfs_site = getSiteProperties(configurations, "hdfs-site")
             dfs_datadirs = hdfs_site.get("dfs.datanode.data.dir").split(",") if hdfs_site and "dfs.datanode.data.dir" in hdfs_site else []
             dfs_datadirs = hdfs_site.get("dfs.datanode.data.dir").split(",") if hdfs_site and "dfs.datanode.data.dir" in hdfs_site else []
             if dn_hosts and collectorHostName in dn_hosts and ams_site and \
             if dn_hosts and collectorHostName in dn_hosts and ams_site and \
@@ -679,8 +688,18 @@ class HDP206StackAdvisor(DefaultStackAdvisor):
                 if dfs_datadir_mountpoint == hbase_rootdir_mountpoint:
                 if dfs_datadir_mountpoint == hbase_rootdir_mountpoint:
                   item = self.getWarnItem("Consider not using {0} partition for storing metrics data. "
                   item = self.getWarnItem("Consider not using {0} partition for storing metrics data. "
                                           "{0} is already used by datanode to store HDFS data".format(hbase_rootdir_mountpoint))
                                           "{0} is already used by datanode to store HDFS data".format(hbase_rootdir_mountpoint))
-                  validationItems.extend([{"config-name":'hbase.rootdir', "item": item}])
+                  validationItems.extend([{"config-name": 'hbase.rootdir', "item": item}])
                   break
                   break
+          # If no local DN in distributed mode
+          elif collectorHostName not in dn_hosts and distributed.lower() == "true":
+            item = self.getWarnItem("It's recommended to install Datanode component on {0} "
+                                    "to speed up IO operations between HDFS and Metrics "
+                                    "Collector in distributed mode ".format(collectorHostName))
+            validationItems.extend([{"config-name": "hbase.cluster.distributed", "item": item}])
+          # Short circuit read should be enabled in distibuted mode
+          # if local DN installed
+          else:
+            validationItems.extend([{"config-name": "dfs.client.read.shortcircuit", "item": self.validatorEqualsToRecommendedItem(properties, recommendedDefaults, "dfs.client.read.shortcircuit")}])
 
 
     return self.toConfigurationValidationProblems(validationItems, "ams-hbase-site")
     return self.toConfigurationValidationProblems(validationItems, "ams-hbase-site")
 
 
@@ -757,16 +776,8 @@ class HDP206StackAdvisor(DefaultStackAdvisor):
             validationItems.extend([{"config-name": heapPropertyToIncrease, "item": unusedMemoryHbaseItem}])
             validationItems.extend([{"config-name": heapPropertyToIncrease, "item": unusedMemoryHbaseItem}])
       pass
       pass
 
 
-    # Check RS memory in distributed mode since we set default as 512m
-    hbase_site = getSiteProperties(configurations, "ams-hbase-site")
-    hbase_rootdir = hbase_site.get("hbase.rootdir")
-    regionServerMinMemItem = None
-    if hbase_rootdir and hbase_rootdir.startswith("hdfs://"):
-      regionServerMinMemItem = self.validateMinMemorySetting(properties, 1024, 'hbase_regionserver_heapsize')
-
     validationItems.extend([
     validationItems.extend([
       {"config-name": "hbase_regionserver_heapsize", "item": regionServerItem},
       {"config-name": "hbase_regionserver_heapsize", "item": regionServerItem},
-      {"config-name": "hbase_regionserver_heapsize", "item": regionServerMinMemItem},
       {"config-name": "hbase_master_heapsize", "item": masterItem},
       {"config-name": "hbase_master_heapsize", "item": masterItem},
       {"config-name": "hbase_master_heapsize", "item": masterHostItem},
       {"config-name": "hbase_master_heapsize", "item": masterHostItem},
       {"config-name": "hbase_log_dir", "item": logDirItem}
       {"config-name": "hbase_log_dir", "item": logDirItem}
@@ -890,6 +901,19 @@ class HDP206StackAdvisor(DefaultStackAdvisor):
 
 
     return None
     return None
 
 
+  def validatorEqualsToRecommendedItem(self, properties, recommendedDefaults,
+                                       propertyName):
+    if not propertyName in properties:
+      return self.getErrorItem("Value should be set for %s" % propertyName)
+    value = properties.get(propertyName)
+    if not propertyName in recommendedDefaults:
+      return self.getErrorItem("Value should be recommended for %s" % propertyName)
+    recommendedValue = recommendedDefaults.get(propertyName)
+    if value != recommendedValue:
+      return self.getWarnItem("It is recommended to set value {0} "
+             "for property {1}".format(recommendedValue, propertyName))
+    return None
+
   def validateMinMemorySetting(self, properties, defaultValue, propertyName):
   def validateMinMemorySetting(self, properties, defaultValue, propertyName):
     if not propertyName in properties:
     if not propertyName in properties:
       return self.getErrorItem("Value should be set")
       return self.getErrorItem("Value should be set")

+ 10 - 9
ambari-server/src/test/python/stacks/2.0.6/AMBARI_METRICS/test_metrics_collector.py

@@ -50,7 +50,7 @@ class TestOozieClient(RMFTestCase):
                               not_if = 'ls /var/run/ambari-metrics-collector//hbase-ams-regionserver.pid >/dev/null 2>&1 && ps `cat /var/run/ambari-metrics-collector//hbase-ams-regionserver.pid` >/dev/null 2>&1',
                               not_if = 'ls /var/run/ambari-metrics-collector//hbase-ams-regionserver.pid >/dev/null 2>&1 && ps `cat /var/run/ambari-metrics-collector//hbase-ams-regionserver.pid` >/dev/null 2>&1',
                               user = 'ams'
                               user = 'ams'
     )
     )
-    self.assertResourceCalled('Execute', 'ambari-sudo.sh rm -rf /var/lib/ambari-metrics-collector/hbase-tmp/*.tmp /var/lib/ambari-metrics-collector/hbase-tmp/zookeeper/*',
+    self.assertResourceCalled('Execute', 'ambari-sudo.sh rm -rf /var/lib/ambari-metrics-collector/hbase-tmp/*.tmp',
     )
     )
     self.assertResourceCalled('Execute', '/usr/sbin/ambari-metrics-collector --config /etc/ambari-metrics-collector/conf --distributed start',
     self.assertResourceCalled('Execute', '/usr/sbin/ambari-metrics-collector --config /etc/ambari-metrics-collector/conf --distributed start',
                               user = 'ams'
                               user = 'ams'
@@ -210,6 +210,15 @@ class TestOozieClient(RMFTestCase):
                               owner = 'ams',
                               owner = 'ams',
                               template_tag = None,
                               template_tag = None,
                               )
                               )
+    self.assertResourceCalled('Directory', '/var/run/ambari-metrics-collector/',
+                              owner = 'ams',
+                              recursive = True
+    )
+    self.assertResourceCalled('Directory', '/var/log/ambari-metrics-collector',
+                              owner = 'ams',
+                              recursive = True
+    )
+
     if name == 'master':
     if name == 'master':
       self.assertResourceCalled('HdfsResource', 'hdfs://localhost:8020/apps/hbase/data',
       self.assertResourceCalled('HdfsResource', 'hdfs://localhost:8020/apps/hbase/data',
                                 security_enabled = False,
                                 security_enabled = False,
@@ -257,14 +266,6 @@ class TestOozieClient(RMFTestCase):
                                 )
                                 )
       self.assertResourceCalled('File', '/var/run/ambari-metrics-collector//distributed_mode', action=["create"],
       self.assertResourceCalled('File', '/var/run/ambari-metrics-collector//distributed_mode', action=["create"],
                                 mode=0644, owner='ams')
                                 mode=0644, owner='ams')
-    self.assertResourceCalled('Directory', '/var/run/ambari-metrics-collector/',
-                              owner = 'ams',
-                              recursive = True
-    )
-    self.assertResourceCalled('Directory', '/var/log/ambari-metrics-collector',
-                              owner = 'ams',
-                              recursive = True
-    )
     self.assertResourceCalled('File', '/etc/ams-hbase/conf/log4j.properties',
     self.assertResourceCalled('File', '/etc/ams-hbase/conf/log4j.properties',
                               owner = 'ams',
                               owner = 'ams',
                               group = 'hadoop',
                               group = 'hadoop',

+ 17 - 0
ambari-server/src/test/python/stacks/2.0.6/common/test_stack_advisor.py

@@ -1394,3 +1394,20 @@ class TestHDP206StackAdvisor(TestCase):
     self.assertEquals(self.stack_advisor_impl.getMountPointForDir("file:///var/log", ["/var", "/"]), "/var")
     self.assertEquals(self.stack_advisor_impl.getMountPointForDir("file:///var/log", ["/var", "/"]), "/var")
     self.assertEquals(self.stack_advisor_impl.getMountPointForDir("hdfs:///hdfs_path", ["/var", "/"]), None)
     self.assertEquals(self.stack_advisor_impl.getMountPointForDir("hdfs:///hdfs_path", ["/var", "/"]), None)
     self.assertEquals(self.stack_advisor_impl.getMountPointForDir("relative/path", ["/var", "/"]), None)
     self.assertEquals(self.stack_advisor_impl.getMountPointForDir("relative/path", ["/var", "/"]), None)
+
+  def test_getValidatorEqualsToRecommendedItem(self):
+    properties = {"property1": "value1"}
+    recommendedDefaults = {"property1": "value1"}
+    self.assertEquals(self.stackAdvisor.validatorEqualsToRecommendedItem(properties, recommendedDefaults, "property1"), None)
+    properties = {"property1": "value1"}
+    recommendedDefaults = {"property1": "value2"}
+    expected = {'message': 'It is recommended to set value value2 for property property1', 'level': 'WARN'}
+    self.assertEquals(self.stackAdvisor.validatorEqualsToRecommendedItem(properties, recommendedDefaults, "property1"), expected)
+    properties = {}
+    recommendedDefaults = {"property1": "value2"}
+    expected = {'level': 'ERROR', 'message': 'Value should be set for property1'}
+    self.assertEquals(self.stackAdvisor.validatorEqualsToRecommendedItem(properties, recommendedDefaults, "property1"), expected)
+    properties = {"property1": "value1"}
+    recommendedDefaults = {}
+    expected = {'level': 'ERROR', 'message': 'Value should be recommended for property1'}
+    self.assertEquals(self.stackAdvisor.validatorEqualsToRecommendedItem(properties, recommendedDefaults, "property1"), expected)

+ 2 - 0
ambari-server/src/test/python/stacks/2.2/common/test_stack_advisor.py

@@ -2161,6 +2161,8 @@ class TestHDP22StackAdvisor(TestCase):
     services["configurations"]['ams-hbase-site']['properties']['hbase.rootdir'] = 'hdfs://host1/amshbase'
     services["configurations"]['ams-hbase-site']['properties']['hbase.rootdir'] = 'hdfs://host1/amshbase'
     expected['ams-hbase-site']['properties']['hbase.rootdir'] = 'hdfs://host1/amshbase'
     expected['ams-hbase-site']['properties']['hbase.rootdir'] = 'hdfs://host1/amshbase'
     expected['ams-hbase-env']['properties']['hbase_master_heapsize'] = '512m'
     expected['ams-hbase-env']['properties']['hbase_master_heapsize'] = '512m'
+    # services["configurations"]['ams-hbase-site']['properties']['dfs.client.read.shortcircuit'] = 'true'
+    expected['ams-hbase-site']['properties']['dfs.client.read.shortcircuit'] = 'true'
 
 
     # Distributed mode, low memory, no splitpoints recommended
     # Distributed mode, low memory, no splitpoints recommended
     services["configurations"]['ams-hbase-env']['properties']['hbase_regionserver_heapsize'] = '512m'
     services["configurations"]['ams-hbase-env']['properties']['hbase_regionserver_heapsize'] = '512m'