Browse Source

AMBARI-10998. Stack advisor: hive.tez.container.size and tez.task.resource.memory.mb should have same value/calculation logic (mpapirkovskyy via srimanth)

Srimanth Gunturi 10 năm trước cách đây
mục cha
commit
6016fc6e8f

+ 4 - 0
ambari-server/src/main/resources/stacks/HDP/2.1/services/HIVE/configuration/hive-site.xml

@@ -374,6 +374,10 @@ limitations under the License.
         <type>yarn-site</type>
         <name>yarn.scheduler.minimum-allocation-mb</name>
       </property>
+      <property>
+        <type>yarn-site</type>
+        <name>yarn.scheduler.maximum-allocation-mb</name>
+      </property>
     </depends-on>
   </property>
 

+ 10 - 0
ambari-server/src/main/resources/stacks/HDP/2.2/services/TEZ/configuration/tez-site.xml

@@ -90,6 +90,16 @@
     <description>The amount of memory to be used by launched tasks.
       Used only if the value is not specified explicitly by the DAG definition.
     </description>
+    <depends-on>
+      <property>
+        <type>yarn-site</type>
+        <name>yarn.scheduler.minimum-allocation-mb</name>
+      </property>
+      <property>
+        <type>yarn-site</type>
+        <name>yarn.scheduler.maximum-allocation-mb</name>
+      </property>
+    </depends-on>
   </property>
 
   <property>

+ 11 - 4
ambari-server/src/main/resources/stacks/HDP/2.2/services/stack_advisor.py

@@ -302,10 +302,12 @@ class HDP22StackAdvisor(HDP21StackAdvisor):
 
     if not "yarn-site" in configurations:
       self.recommendYARNConfigurations(configurations, clusterData, services, hosts)
+    #properties below should be always present as they are provided in HDP206 stack advisor at least
+    yarnMaxAllocationSize = min(30 * int(configurations["yarn-site"]["properties"]["yarn.scheduler.minimum-allocation-mb"]), int(configurations["yarn-site"]["properties"]["yarn.scheduler.maximum-allocation-mb"]))
+    #duplicate tez task resource calc logic, direct dependency doesn't look good here (in case of Hive without Tez)
+    container_size = clusterData['mapMemory'] if clusterData['mapMemory'] > 2048 else int(clusterData['reduceMemory'])
+    container_size = min(clusterData['containers'] * clusterData['ramPerContainer'], container_size, yarnMaxAllocationSize)
 
-    if "yarn-site" in configurations and \
-      "yarn.scheduler.minimum-allocation-mb" in configurations["yarn-site"]["properties"]:
-      container_size = configurations["yarn-site"]["properties"]["yarn.scheduler.minimum-allocation-mb"]
     putHiveSiteProperty("hive.tez.container.size", container_size)
     putHiveSiteProperty("hive.prewarm.enabled", "false")
     putHiveSiteProperty("hive.prewarm.numcontainers", "3")
@@ -528,11 +530,16 @@ class HDP22StackAdvisor(HDP21StackAdvisor):
 
 
   def recommendTezConfigurations(self, configurations, clusterData, services, hosts):
+    if not "yarn-site" in configurations:
+      self.recommendYARNConfigurations(configurations, clusterData, services, hosts)
+    #properties below should be always present as they are provided in HDP206 stack advisor
+    yarnMaxAllocationSize = min(30 * int(configurations["yarn-site"]["properties"]["yarn.scheduler.minimum-allocation-mb"]), int(configurations["yarn-site"]["properties"]["yarn.scheduler.maximum-allocation-mb"]))
+
     putTezProperty = self.putProperty(configurations, "tez-site")
     putTezProperty("tez.am.resource.memory.mb", int(clusterData['amMemory']) * 2 if int(clusterData['amMemory']) < 3072 else int(clusterData['amMemory']))
 
     taskResourceMemory = clusterData['mapMemory'] if clusterData['mapMemory'] > 2048 else int(clusterData['reduceMemory'])
-    taskResourceMemory = min(clusterData['containers'] * clusterData['ramPerContainer'], taskResourceMemory)
+    taskResourceMemory = min(clusterData['containers'] * clusterData['ramPerContainer'], taskResourceMemory, yarnMaxAllocationSize)
     putTezProperty("tez.task.resource.memory.mb", taskResourceMemory)
     putTezProperty("tez.runtime.io.sort.mb", min(int(taskResourceMemory * 0.4), 2047))
     putTezProperty("tez.runtime.unordered.output.buffer.size-mb", int(taskResourceMemory * 0.075))

+ 48 - 7
ambari-server/src/test/python/stacks/2.2/common/test_stack_advisor.py

@@ -68,7 +68,14 @@ class TestHDP22StackAdvisor(TestCase):
     return self.get_system_min_uid_real()
 
   def test_recommendTezConfigurations(self):
-    configurations = {}
+    configurations = {
+        "yarn-site": {
+            "properties": {
+                "yarn.scheduler.minimum-allocation-mb": "256",
+                "yarn.scheduler.maximum-allocation-mb": "2048",
+                },
+            }
+    }
     clusterData = {
       "mapMemory": 3000,
       "amMemory": 2000,
@@ -84,13 +91,26 @@ class TestHDP22StackAdvisor(TestCase):
           "tez.runtime.io.sort.mb": "307",
           "tez.runtime.unordered.output.buffer.size-mb": "57"
         }
+      },
+      'yarn-site': {
+        'properties': {
+          'yarn.scheduler.minimum-allocation-mb': '256',
+          'yarn.scheduler.maximum-allocation-mb': '2048'
+        }
       }
     }
     self.stackAdvisor.recommendTezConfigurations(configurations, clusterData, None, None)
     self.assertEquals(configurations, expected)
 
   def test_recommendTezConfigurations_amMemoryMoreThan3072(self):
-    configurations = {}
+    configurations = {
+      "yarn-site": {
+        "properties": {
+          "yarn.scheduler.minimum-allocation-mb": "256",
+          "yarn.scheduler.maximum-allocation-mb": "2048",
+          },
+        }
+    }
     clusterData = {
       "mapMemory": 4000,
       "amMemory": 3100,
@@ -106,13 +126,26 @@ class TestHDP22StackAdvisor(TestCase):
           "tez.runtime.io.sort.mb": "307",
           "tez.runtime.unordered.output.buffer.size-mb": "57"
         }
+      },
+      'yarn-site': {
+        'properties': {
+          'yarn.scheduler.minimum-allocation-mb': '256',
+          'yarn.scheduler.maximum-allocation-mb': '2048'
+        }
       }
     }
     self.stackAdvisor.recommendTezConfigurations(configurations, clusterData, None, None)
     self.assertEquals(configurations, expected)
 
   def test_recommendTezConfigurations_mapMemoryLessThan768(self):
-    configurations = {}
+    configurations = {
+      "yarn-site": {
+        "properties": {
+          "yarn.scheduler.minimum-allocation-mb": "256",
+          "yarn.scheduler.maximum-allocation-mb": "2048",
+          },
+        }
+    }
     clusterData = {
       "mapMemory": 760,
       "amMemory": 2000,
@@ -128,6 +161,12 @@ class TestHDP22StackAdvisor(TestCase):
           "tez.runtime.io.sort.mb": "304",
           "tez.runtime.unordered.output.buffer.size-mb": "57"
         }
+      },
+      'yarn-site': {
+        'properties': {
+          'yarn.scheduler.minimum-allocation-mb': '256',
+          'yarn.scheduler.maximum-allocation-mb': '2048'
+        }
       }
     }
     self.stackAdvisor.recommendTezConfigurations(configurations, clusterData, None, None)
@@ -872,6 +911,7 @@ class TestHDP22StackAdvisor(TestCase):
       "yarn-site": {
         "properties": {
           "yarn.scheduler.minimum-allocation-mb": "256",
+          "yarn.scheduler.maximum-allocation-mb": "8192",
         },
       },
       "capacity-scheduler": {
@@ -896,7 +936,8 @@ class TestHDP22StackAdvisor(TestCase):
       },
       'yarn-site': {
         'properties': {
-          'yarn.scheduler.minimum-allocation-mb': '256'
+          'yarn.scheduler.minimum-allocation-mb': '256',
+          'yarn.scheduler.maximum-allocation-mb': '8192'
         }
       },
       'hive-env': {
@@ -910,7 +951,7 @@ class TestHDP22StackAdvisor(TestCase):
       },
       'hive-site': {
         'properties': {
-          'hive.auto.convert.join.noconditionaltask.size': '89478485',
+          'hive.auto.convert.join.noconditionaltask.size': '268435456',
           'hive.cbo.enable': 'true',
           'hive.compactor.initiator.on': 'false',
           'hive.compactor.worker.threads': '0',
@@ -936,7 +977,7 @@ class TestHDP22StackAdvisor(TestCase):
           'hive.stats.fetch.partition.stats': 'true',
           'hive.support.concurrency': 'false',
           'hive.tez.auto.reducer.parallelism': 'true',
-          'hive.tez.container.size': '256',
+          'hive.tez.container.size': '768',
           'hive.tez.dynamic.partition.pruning': 'true',
           'hive.tez.java.opts': '-server -Xmx615m -Djava.net.preferIPv4Stack=true -XX:NewRatio=8 -XX:+UseNUMA -XX:+UseParallelGC -XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps',
           'hive.txn.manager': 'org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager',
@@ -944,7 +985,7 @@ class TestHDP22StackAdvisor(TestCase):
           'hive.vectorized.execution.reduce.enabled': 'false'
         },
        'property_attributes': {
-         'hive.auto.convert.join.noconditionaltask.size': {'maximum': '268435456'},
+         'hive.auto.convert.join.noconditionaltask.size': {'maximum': '805306368'},
          'hive.server2.authentication.pam.services': {'delete': 'true'}, 
          'hive.server2.custom.authentication.class': {'delete': 'true'}, 
          'hive.server2.authentication.ldap.baseDN': {'delete': 'true'},