소스 검색

AMBARI-9572. Enhancements to Spark - Ambari integration (Gautam Borad via srimanth)

Srimanth Gunturi 10 년 전
부모
커밋
da7acb02d9

+ 27 - 0
ambari-server/src/main/resources/common-services/SPARK/1.2.0.2.2/configuration/spark-javaopts-properties.xml

@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+<configuration supports_final="true">
+  <property>
+    <name>content</name>
+    <description>Spark-javaopts-properties</description>
+    <value> </value>
+  </property>
+</configuration>

+ 60 - 3
ambari-server/src/main/resources/common-services/SPARK/1.2.0.2.2/metainfo.xml

@@ -30,6 +30,29 @@
           <displayName>Spark History Server</displayName>
           <displayName>Spark History Server</displayName>
           <category>MASTER</category>
           <category>MASTER</category>
           <cardinality>1</cardinality>
           <cardinality>1</cardinality>
+          <dependencies>
+            <dependency>
+              <name>HDFS/HDFS_CLIENT</name>
+              <scope>host</scope>
+              <auto-deploy>
+                <enabled>true</enabled>
+              </auto-deploy>
+            </dependency>
+            <dependency>
+               <name>MAPREDUCE2/MAPREDUCE2_CLIENT</name>
+               <scope>host</scope>
+               <auto-deploy>
+                 <enabled>true</enabled>
+               </auto-deploy>
+            </dependency>
+            <dependency>
+              <name>YARN/YARN_CLIENT</name>
+              <scope>host</scope>
+              <auto-deploy>
+                <enabled>true</enabled>
+             </auto-deploy>
+           </dependency>
+          </dependencies>
           <commandScript>
           <commandScript>
             <script>scripts/job_history_server.py</script>
             <script>scripts/job_history_server.py</script>
             <scriptType>PYTHON</scriptType>
             <scriptType>PYTHON</scriptType>
@@ -41,6 +64,29 @@
           <displayName>Spark Client</displayName>
           <displayName>Spark Client</displayName>
           <category>CLIENT</category>
           <category>CLIENT</category>
           <cardinality>1+</cardinality>
           <cardinality>1+</cardinality>
+           <dependencies>
+            <dependency>
+              <name>HDFS/HDFS_CLIENT</name>
+              <scope>host</scope>
+              <auto-deploy>
+                <enabled>true</enabled>
+              </auto-deploy>
+            </dependency>
+            <dependency>
+               <name>MAPREDUCE2/MAPREDUCE2_CLIENT</name>
+               <scope>host</scope>
+               <auto-deploy>
+                 <enabled>true</enabled>
+               </auto-deploy>
+            </dependency>
+            <dependency>
+              <name>YARN/YARN_CLIENT</name>
+              <scope>host</scope>
+              <auto-deploy>
+                <enabled>true</enabled>
+             </auto-deploy>
+            </dependency>
+          </dependencies>
           <commandScript>
           <commandScript>
             <script>scripts/spark_client.py</script>
             <script>scripts/spark_client.py</script>
             <scriptType>PYTHON</scriptType>
             <scriptType>PYTHON</scriptType>
@@ -51,13 +97,24 @@
 
 
       <osSpecifics>
       <osSpecifics>
         <osSpecific>
         <osSpecific>
-          <osFamily>redhat5,redhat6,suse11,ubuntu12</osFamily>
+          <osFamily>redhat5,redhat6,suse11</osFamily>
           <packages>
           <packages>
             <package>
             <package>
-              <name>spark</name>
+              <name>spark_2_2_*</name>
             </package>
             </package>
             <package>
             <package>
-              <name>spark-python</name>
+              <name>spark_2_2_*-python</name>
+            </package>
+          </packages>
+        </osSpecific>
+        <osSpecific>
+          <osFamily>ubuntu12</osFamily>
+          <packages>
+            <package>
+              <name>spark-2-2-.*</name>
+            </package>
+            <package>
+              <name>spark-2-2-.*-python</name>
             </package>
             </package>
           </packages>
           </packages>
         </osSpecific>
         </osSpecific>

+ 5 - 5
ambari-server/src/main/resources/common-services/SPARK/1.2.0.2.2/package/scripts/job_history_server.py

@@ -36,7 +36,7 @@ class JobHistoryServer(Script):
     import params
     import params
 
 
     env.set_params(params)
     env.set_params(params)
-    if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.1.0') >= 0:
+    if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0:
       Execute(format("hdp-select set spark-historyserver {version}"))
       Execute(format("hdp-select set spark-historyserver {version}"))
 
 
   def install(self, env):
   def install(self, env):
@@ -71,10 +71,10 @@ class JobHistoryServer(Script):
       Execute(spark_kinit_cmd, user=params.spark_user)
       Execute(spark_kinit_cmd, user=params.spark_user)
 
 
     # FIXME! TODO! remove this after soft link bug is fixed:
     # FIXME! TODO! remove this after soft link bug is fixed:
-    if not os.path.islink('/usr/hdp/current/spark'):
-      hdp_version = get_hdp_version()
-      cmd = 'ln -s /usr/hdp/' + hdp_version + '/spark /usr/hdp/current/spark'
-      Execute(cmd)
+    #if not os.path.islink('/usr/hdp/current/spark'):
+    #  hdp_version = get_hdp_version()
+    #  cmd = 'ln -s /usr/hdp/' + hdp_version + '/spark /usr/hdp/current/spark'
+    #  Execute(cmd)
 
 
     daemon_cmd = format('{spark_history_server_start}')
     daemon_cmd = format('{spark_history_server_start}')
     no_op_test = format(
     no_op_test = format(

+ 5 - 0
ambari-server/src/main/resources/common-services/SPARK/1.2.0.2.2/package/scripts/params.py

@@ -103,6 +103,7 @@ spark_history_ui_port = default("/configurations/spark-defaults/spark.history.ui
 spark_env_sh = config['configurations']['spark-env']['content']
 spark_env_sh = config['configurations']['spark-env']['content']
 spark_log4j_properties = config['configurations']['spark-log4j-properties']['content']
 spark_log4j_properties = config['configurations']['spark-log4j-properties']['content']
 spark_metrics_properties = config['configurations']['spark-metrics-properties']['content']
 spark_metrics_properties = config['configurations']['spark-metrics-properties']['content']
+spark_javaopts_properties = config['configurations']['spark-javaopts-properties']['content']
 
 
 hive_server_host = default("/clusterHostInfo/hive_server_host", [])
 hive_server_host = default("/clusterHostInfo/hive_server_host", [])
 is_hive_installed = not len(hive_server_host) == 0
 is_hive_installed = not len(hive_server_host) == 0
@@ -117,6 +118,10 @@ spark_yarn_am_extraJavaOptions = str(config['configurations']['spark-defaults'][
 if spark_yarn_am_extraJavaOptions.find('-Dhdp.version') == -1:
 if spark_yarn_am_extraJavaOptions.find('-Dhdp.version') == -1:
   spark_yarn_am_extraJavaOptions = spark_yarn_am_extraJavaOptions + ' -Dhdp.version=' + str(hdp_full_version)
   spark_yarn_am_extraJavaOptions = spark_yarn_am_extraJavaOptions + ' -Dhdp.version=' + str(hdp_full_version)
 
 
+spark_javaopts_properties = str(spark_javaopts_properties)
+if spark_javaopts_properties.find('-Dhdp.version') == -1:
+  spark_javaopts_properties = spark_javaopts_properties+ ' -Dhdp.version=' + str(hdp_full_version)
+
 security_enabled = config['configurations']['cluster-env']['security_enabled']
 security_enabled = config['configurations']['cluster-env']['security_enabled']
 kinit_path_local = functions.get_kinit_path(["/usr/bin", "/usr/kerberos/bin", "/usr/sbin"])
 kinit_path_local = functions.get_kinit_path(["/usr/bin", "/usr/kerberos/bin", "/usr/sbin"])
 spark_kerberos_keytab =  config['configurations']['spark-defaults']['spark.history.kerberos.keytab']
 spark_kerberos_keytab =  config['configurations']['spark-defaults']['spark.history.kerberos.keytab']

+ 48 - 25
ambari-server/src/main/resources/common-services/SPARK/1.2.0.2.2/package/scripts/service_check.py

@@ -17,37 +17,60 @@ limitations under the License.
 """
 """
 
 
 from resource_management import *
 from resource_management import *
-
+import subprocess
+import time
 
 
 class SparkServiceCheck(Script):
 class SparkServiceCheck(Script):
   def service_check(self, env):
   def service_check(self, env):
     import params
     import params
 
 
     env.set_params(params)
     env.set_params(params)
-    self.check_spark_job_history_server()
-    # self.check_spark_client()
-
-  def check_spark_job_history_server(self):
-    cmd = 'ps -ef | grep org.apache.spark.deploy.history.HistoryServer | grep -v grep'
-    code, output = shell.call(cmd, timeout=100)
-    if code == 0:
-      Logger.info('Spark job History Server up and running')
-    else:
-      Logger.debug('Spark job History Server not running')
-      raise ComponentIsNotRunning()
-
-  pass
-
-  # def check_spark_client(self):
-  # import params
-  #   smoke_cmd = params.spark_service_check_cmd
-  #   code, output = shell.call(smoke_cmd, timeout=100)
-  #   if code == 0:
-  #     Logger.info('Spark on Yarn Job can be submitted')
-  #   else:
-  #     Logger.debug('Spark on Yarn Job cannot be submitted')
-  #     raise ComponentIsNotRunning()
-  # pass
+
+    # smoke_cmd = params.spark_service_check_cmd
+    # code, output = shell.call(smoke_cmd, timeout=100)
+    # if code == 0:
+    #   Logger.info('Spark-on-Yarn Job submitted successfully')
+    # else:
+    #   Logger.info('Spark-on-Yarn Job cannot be submitted')
+    #   raise ComponentIsNotRunning()
+
+    command = "curl"
+    httpGssnegotiate = "--negotiate"
+    userpswd = "-u:"
+    insecure = "-k"
+    silent = "-s"
+    out = "-o /dev/null"
+    head = "-w'%{http_code}'"
+    url = 'http://' + params.spark_history_server_host + ':' + str(params.spark_history_ui_port)
+
+    command_with_flags = [command, silent, out, head, httpGssnegotiate, userpswd, insecure, url]
+
+    is_running = False
+    for i in range(0,10):
+      proc = subprocess.Popen(command_with_flags, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+      (stdout, stderr) = proc.communicate()
+      response = stdout
+      if '200' in response:
+        is_running = True
+        Logger.info('Spark Job History Server up and running')
+        break
+      time.sleep(5)
+
+    if is_running == False :
+      Logger.info('Spark Job History Server not running.')
+      raise ComponentIsNotRunning()  
+
+
+
+    #command_with_flags = [command, silent, out, head, httpGssnegotiate, userpswd, insecure, url]
+    # proc = subprocess.Popen(command_with_flags, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    # (stdout, stderr) = proc.communicate()
+    # response = stdout
+    # if '200' in response:
+    #   Logger.info('Spark Job History Server up and running')
+    # else:
+    #   Logger.info('Spark Job History Server not running.')
+    #   raise ComponentIsNotRunning()
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":
   SparkServiceCheck().execute()
   SparkServiceCheck().execute()

+ 26 - 3
ambari-server/src/main/resources/common-services/SPARK/1.2.0.2.2/package/scripts/setup_spark.py

@@ -65,15 +65,38 @@ def setup_spark(env):
        content=InlineTemplate(params.spark_metrics_properties)
        content=InlineTemplate(params.spark_metrics_properties)
   )
   )
 
 
+  File(os.path.join(params.spark_conf, 'java-opts'),
+       owner=params.spark_user,
+       group=params.spark_group,
+       content=params.spark_javaopts_properties
+  )
+
   if params.is_hive_installed:
   if params.is_hive_installed:
+    hive_config = get_hive_config()
     XmlConfig("hive-site.xml",
     XmlConfig("hive-site.xml",
               conf_dir=params.spark_conf,
               conf_dir=params.spark_conf,
-              configurations=params.config['configurations']['hive-site'],
-              configuration_attributes=params.config['configuration_attributes']['hive-site'],
+              configurations=hive_config,
               owner=params.spark_user,
               owner=params.spark_user,
               group=params.spark_group,
               group=params.spark_group,
               mode=0644)
               mode=0644)
 
 
+def get_hive_config():
+  import params
+  hive_conf_dict = dict()
+  hive_conf_dict['hive.metastore.uris'] = params.config['configurations']['hive-site']['hive.metastore.uris']
+  if params.security_enabled:
+    hive_conf_dict['hive.metastore.sasl.enabled'] =  str(params.config['configurations']['hive-site']['hive.metastore.sasl.enabled']).lower()
+    hive_conf_dict['hive.metastore.kerberos.keytab.file'] = params.config['configurations']['hive-site']['hive.metastore.kerberos.keytab.file']
+    hive_conf_dict['hive.server2.authentication.spnego.principal'] =  params.config['configurations']['hive-site']['hive.server2.authentication.spnego.principal']
+    hive_conf_dict['hive.server2.authentication.spnego.keytab'] = params.config['configurations']['hive-site']['hive.server2.authentication.spnego.keytab']
+    hive_conf_dict['hive.metastore.kerberos.principal'] = params.config['configurations']['hive-site']['hive.metastore.kerberos.principal']
+    hive_conf_dict['hive.server2.authentication.kerberos.principal'] = params.config['configurations']['hive-site']['hive.server2.authentication.kerberos.principal']
+    hive_conf_dict['hive.server2.authentication.kerberos.keytab'] =  params.config['configurations']['hive-site']['hive.server2.authentication.kerberos.keytab']
+    hive_conf_dict['hive.security.authorization.enabled']=  str(params.config['configurations']['hive-site']['hive.security.authorization.enabled']).lower()
+    hive_conf_dict['hive.server2.enable.doAs'] =  str(params.config['configurations']['hive-site']['hive.server2.enable.doAs']).lower()
+
+  return hive_conf_dict
+
 
 
 def spark_properties(params):
 def spark_properties(params):
   spark_dict = dict()
   spark_dict = dict()
@@ -166,4 +189,4 @@ def get_hdp_version():
   if match is None:
   if match is None:
     raise Fail('Failed to get extracted version')
     raise Fail('Failed to get extracted version')
 
 
-  return hdp_version
+  return hdp_version

+ 1 - 1
ambari-server/src/main/resources/common-services/SPARK/1.2.0.2.2/package/scripts/spark_client.py

@@ -34,7 +34,7 @@ class SparkClient(Script):
     import params
     import params
 
 
     env.set_params(params)
     env.set_params(params)
-    if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.1.0') >= 0:
+    if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') >= 0:
       Execute(format("hdp-select set spark-client {version}"))
       Execute(format("hdp-select set spark-client {version}"))
 
 
   def install(self, env):
   def install(self, env):

+ 1 - 0
ambari-server/src/main/resources/stacks/HDP/2.2/role_command_order.json

@@ -5,6 +5,7 @@
     "_comment" : "dependencies for all cases",
     "_comment" : "dependencies for all cases",
     "RANGER_SERVICE_CHECK-SERVICE_CHECK" : ["RANGER_ADMIN-START"],
     "RANGER_SERVICE_CHECK-SERVICE_CHECK" : ["RANGER_ADMIN-START"],
     "RANGER_SERVICE_CHECK-SERVICE_CHECK" : ["RANGER_USERSYNC-START"],
     "RANGER_SERVICE_CHECK-SERVICE_CHECK" : ["RANGER_USERSYNC-START"],
+	"SPARK_SERVICE_CHECK-SERVICE_CHECK" : ["SPARK_JOBHISTORYSERVER-START"],
     "FALCON_SERVER-START": ["NAMENODE-START", "DATANODE-START", "OOZIE_SERVER-START"],
     "FALCON_SERVER-START": ["NAMENODE-START", "DATANODE-START", "OOZIE_SERVER-START"],
     "WEBHCAT_SERVICE_CHECK-SERVICE_CHECK": ["WEBHCAT_SERVER-START"],
     "WEBHCAT_SERVICE_CHECK-SERVICE_CHECK": ["WEBHCAT_SERVER-START"],
     "FLUME_SERVICE_CHECK-SERVICE_CHECK": ["FLUME_HANDLER-START"],
     "FLUME_SERVICE_CHECK-SERVICE_CHECK": ["FLUME_HANDLER-START"],