فهرست منبع

AMBARI-4858. Add configurations to support Hive running on Tez or Mapreduce (ncole)

Nate Cole 11 سال پیش
والد
کامیت
0ee9b81f9f

+ 104 - 2
ambari-server/src/main/resources/stacks/HDP/2.1.1/services/HIVE/configuration/hive-site.xml

@@ -164,6 +164,12 @@ limitations under the License.
     <description>Whether sorting is enforced. If true, while inserting into the table, sorting is enforced.</description>
   </property>
 
+  <property>
+    <name>hive.enforce.sortmergebucketmapjoin</name>
+    <value>true</value>
+    <description>If the user asked for sort-merge bucketed map-side join, and it cannot be performed, should the query fail or not</description>
+  </property>
+
   <property>
     <name>hive.map.aggr</name>
     <value>true</value>
@@ -234,7 +240,7 @@ limitations under the License.
 
   <property>
     <name>hive.optimize.reducededuplication.min.reducer</name>
-    <value>1</value>
+    <value>4</value>
     <description>Reduce deduplication merges two RSs by moving key/parts/reducer-num of the child RS to parent RS.
       That means if reducer-num of the child RS is fixed (order by or forced bucketing) and small, it can make very slow, single MR.
       The optimization will be disabled if number of reducers is less than specified value.
@@ -262,7 +268,7 @@ limitations under the License.
 
   <property>
     <name>hive.vectorized.execution.enabled</name>
-    <value>false</value>
+    <value>true</value>
     <description>This flag controls the vectorized mode of query execution as documented in HIVE-4160 (as of Hive 0.13.0)
     </description>
   </property>
@@ -300,4 +306,100 @@ limitations under the License.
     <description>Pre Execute Hook for Tests</description>
   </property>
 
+  <property>
+    <name>hive.vectorized.groupby.maxentries</name>
+    <value>1024</value>
+    <description>Max number of entries in the vector group by aggregation hashtables.
+      Exceeding this will trigger a flush irrelevant of memory pressure condition.
+    </description>
+  </property>
+
+  <property>
+    <name>hive.vectorized.groupby.checkinterval</name>
+    <value>1024</value>
+    <description>Number of entries added to the group by aggregation hash before a reocmputation of average entry size is performed.</description>
+  </property>
+
+  <property>
+    <name>hive.vectorized.groupby.flush.percent</name>
+    <value>1.0</value>
+    <description>Percent of entries in the group by aggregation hash flushed when the memory treshold is exceeded.</description>
+  </property>
+
+  <property>
+    <name>hive.stats.autogather</name>
+    <value>true</value>
+    <description>A flag to gather statistics automatically during the INSERT OVERWRITE command.</description>
+  </property>
+
+  <property>
+    <name>hive.tez.container.size</name>
+    <value>3000000000</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>hive.tez.input.format</name>
+    <value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>hive.tez.java.opts</name>
+    <value>-server -Xmx1024m -Djava.net.preferIPv4Stack=true</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>hive.compute.query.using.stats</name>
+    <value>true</value>
+    <description>
+      When set to true Hive will answer a few queries like count(1) purely using stats
+      stored in metastore. For basic stats collection turn on the config hive.stats.autogather to true.
+      For more advanced stats collection need to run analyze table queries.
+    </description>
+  </property>
+
+  <property>
+    <name>hive.orc.splits.include.file.footer</name>
+    <value>false</value>
+    <description>
+      If turned on splits generated by orc will include metadata about the stripes in the file. This
+      data is read remotely (from the client or HS2 machine) and sent to all the tasks.
+    </description>
+  </property>
+
+  <property>
+    <name>hive.limit.pushdown.memory.usage</name>
+    <value>0.04</value>
+    <description>The max memory to be used for hash in RS operator for top K selection.</description>
+  </property>
+
+  <property>
+    <name>hive.jar.directory</name>
+    <value>hdfs:///apps/hive/install</value>
+    <description>
+      This is the location hive in Tez mode will look for to find a site wide 
+      installed hive instance.
+    </description>
+  </property>
+
+  <property>
+    <name>hive.server2.tez.default.queues</name>
+    <value></value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>hive.server2.tez.sessions.per.default.queue</name>
+    <value>1</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>hive.server2.tez.initialize.default.sessions</name>
+    <value>false</value>
+    <description></description>
+  </property>
+
 </configuration>

+ 79 - 0
ambari-server/src/main/resources/stacks/HDP/2.1.1/services/HIVE/configuration/mapred-site.xml

@@ -0,0 +1,79 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+
+<configuration>
+  <property>
+    <name>mapred.reduce.parallel.copies</name>
+    <value>30</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>mapred.job.shuffle.input.buffer.percent</name>
+    <value>0.6</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>mapred.job.reduce.input.buffer.percent</name>
+    <value>0.2</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>mapred.map.child.java.opts</name>
+    <value>-server -Xmx1536m -Djava.net.preferIPv4Stack=true -XX:+UseNUMA -XX:+UseParallelGC</value>
+    <description></description>
+  </property>
+    
+
+  <property>
+    <name>mapred.reduce.child.java.opts</name>
+    <value>-server -Xmx1536m -Djava.net.preferIPv4Stack=true -XX:+UseNUMA -XX:+UseParallelGC</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>mapreduce.map.memory.mb</name>
+    <value>2048</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>mapreduce.reduce.memory.mb</name>
+    <value>2048</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>mapreduce.map.output.compress</name>
+    <value>false</value>
+    <description></description>
+  </property>
+
+  <property>
+    <name>mapreduce.map.output.compress.codec</name>
+    <value>org.apache.hadoop.io.compress.SnappyCodec</value>
+    <description></description>
+  </property>
+
+</configuration>

+ 1 - 0
ambari-server/src/main/resources/stacks/HDP/2.1.1/services/HIVE/metainfo.xml

@@ -75,6 +75,7 @@
         <config-type>hive-log4j</config-type>
         <config-type>hive-exec-log4j</config-type>
         <config-type>global</config-type>
+        <config-type>mapred-site</config-type>
       </configuration-dependencies>
     </service>