Jelajahi Sumber

AMBARI-9494. Support Spark installation via Ambari (Gautam Borad via alejandro)

Alejandro Fernandez 10 tahun lalu
induk
melakukan
43352a755a

+ 32 - 0
ambari-server/src/main/resources/common-services/SPARK/1.2.0.2.2/alerts.json

@@ -0,0 +1,32 @@
+{
+  "SPARK": {
+    "service": [],
+    "SPARK_JOBHISTORYSERVER": [
+      {
+        "name": "SPARK_JOBHISTORYSERVER_PROCESS",
+        "label": "Spark History Server",
+        "description": "This host-level alert is triggered if the Spark History Server cannot be determined to be up.",
+        "interval": 1,
+        "scope": "HOST",
+        "source": {
+          "type": "PORT",
+          "uri": "{{spark-defaults/spark.history.ui.port}}",
+          "default_port": 18080,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.3f}s response on port {1}"
+            },
+            "warning": {
+              "text": "TCP OK - {0:.3f}s response on port {1}",
+              "value": 1.5
+            },
+            "critical": {
+              "text": "Connection failed: {0} to {1}:{2}",
+              "value": 5
+            }
+          }
+        }
+      }
+    ]
+  }
+}

+ 89 - 0
ambari-server/src/main/resources/common-services/SPARK/1.2.0.2.2/configuration/spark-defaults.xml

@@ -0,0 +1,89 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+
+<configuration supports_final="true">
+
+  <property>
+    <name>spark.yarn.executor.memoryOverhead</name>
+    <value>384</value>
+  </property>
+
+  <property>
+    <name>spark.yarn.driver.memoryOverhead</name>
+    <value>384</value>
+  </property>
+
+  <property>
+    <name>spark.yarn.applicationMaster.waitTries</name>
+    <value>10</value>
+  </property>
+
+  <property>
+    <name>spark.yarn.scheduler.heartbeat.interval-ms</name>
+    <value>5000</value>
+  </property>
+
+  <property>
+    <name>spark.yarn.max.executor.failures</name>
+    <value>3</value>
+  </property>
+
+  <property>
+    <name>spark.yarn.queue</name>
+    <value>default</value>
+  </property>
+
+  <property>
+    <name>spark.yarn.containerLauncherMaxThreads</name>
+    <value>25</value>
+  </property>
+
+  <property>
+    <name>spark.yarn.submit.file.replication</name>
+    <value>3</value>
+  </property>
+
+  <property>
+    <name>spark.yarn.preserve.staging.files</name>
+    <value>false</value>
+  </property>
+
+  <property>
+    <name>spark.history.provider</name>
+    <value>org.apache.spark.deploy.yarn.history.YarnHistoryProvider</value>
+  </property>
+
+  <property>
+    <name>spark.history.ui.port</name>
+    <value>18080</value>
+  </property>
+
+  <property>
+    <name>spark.driver.extraJavaOptions</name>
+    <value></value>
+  </property>
+
+  <property>
+    <name>spark.yarn.am.extraJavaOptions</name>
+    <value></value>
+  </property>
+
+</configuration>

+ 92 - 0
ambari-server/src/main/resources/common-services/SPARK/1.2.0.2.2/configuration/spark-env.xml

@@ -0,0 +1,92 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+
+<configuration>
+  <property>
+    <name>spark_user</name>
+    <value>spark</value>
+    <property-type>USER</property-type>
+  </property>
+
+  <property>
+    <name>spark_group</name>
+    <value>spark</value>
+    <property-type>GROUP</property-type>
+    <description>spark group</description>
+  </property>
+
+  <property>
+    <name>spark_log_dir</name>
+    <value>/var/log/spark</value>
+    <description>Spark Log Dir</description>
+  </property>
+
+  <property>
+    <name>spark_pid_dir</name>
+    <value>/var/run/spark</value>
+  </property>
+
+  <!-- spark-env.sh -->
+  <property>
+    <name>content</name>
+    <description>This is the jinja template for spark-env.sh file</description>
+    <value>
+#!/usr/bin/env bash
+
+# This file is sourced when running various Spark programs.
+# Copy it as spark-env.sh and edit that to configure Spark for your site.
+
+# Options read in YARN client mode
+#SPARK_EXECUTOR_INSTANCES="2" #Number of workers to start (Default: 2)
+#SPARK_EXECUTOR_CORES="1" #Number of cores for the workers (Default: 1).
+#SPARK_EXECUTOR_MEMORY="1G" #Memory per Worker (e.g. 1000M, 2G) (Default: 1G)
+#SPARK_DRIVER_MEMORY="512 Mb" #Memory for Master (e.g. 1000M, 2G) (Default: 512 Mb)
+#SPARK_YARN_APP_NAME="spark" #The name of your application (Default: Spark)
+#SPARK_YARN_QUEUE="~@~Xdefault~@~Y" #The hadoop queue to use for allocation requests (Default: @~Xdefault~@~Y)
+#SPARK_YARN_DIST_FILES="" #Comma separated list of files to be distributed with the job.
+#SPARK_YARN_DIST_ARCHIVES="" #Comma separated list of archives to be distributed with the job.
+
+# Generic options for the daemons used in the standalone deploy mode
+
+# Alternate conf dir. (Default: ${SPARK_HOME}/conf)
+export SPARK_CONF_DIR=${SPARK_HOME:-{{spark_home}}}/conf
+
+# Where log files are stored.(Default:${SPARK_HOME}/logs)
+#export SPARK_LOG_DIR=${SPARK_HOME:-{{spark_home}}}/logs
+export SPARK_LOG_DIR={{spark_log_dir}}
+
+# Where the pid file is stored. (Default: /tmp)
+export SPARK_PID_DIR={{spark_pid_dir}}
+
+# A string representing this instance of spark.(Default: $USER)
+SPARK_IDENT_STRING=$USER
+
+# The scheduling priority for daemons. (Default: 0)
+SPARK_NICENESS=0
+
+export HADOOP_HOME=${HADOOP_HOME:-{{hadoop_home}}}
+export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-{{hadoop_conf_dir}}}
+
+    </value>
+  </property>
+
+</configuration>

+ 42 - 0
ambari-server/src/main/resources/common-services/SPARK/1.2.0.2.2/configuration/spark-log4j-properties.xml

@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+
+<configuration supports_final="false">
+  <property>
+    <name>content</name>
+    <description>Spark-log4j-Properties</description>
+    <value>
+# Set everything to be logged to the console
+log4j.rootCategory=INFO, console
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+
+# Settings to quiet third party logs that are too verbose
+log4j.logger.org.eclipse.jetty=WARN
+log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR
+log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
+log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
+
+    </value>
+  </property>
+</configuration>

+ 160 - 0
ambari-server/src/main/resources/common-services/SPARK/1.2.0.2.2/configuration/spark-metrics-properties.xml

@@ -0,0 +1,160 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+<configuration supports_final="true">
+  <property>
+    <name>content</name>
+    <description>Spark-metrics-properties</description>
+    <value>
+# syntax: [instance].sink|source.[name].[options]=[value]
+
+# This file configures Spark's internal metrics system. The metrics system is
+# divided into instances which correspond to internal components.
+# Each instance can be configured to report its metrics to one or more sinks.
+# Accepted values for [instance] are "master", "worker", "executor", "driver",
+# and "applications". A wild card "*" can be used as an instance name, in
+# which case all instances will inherit the supplied property.
+#
+# Within an instance, a "source" specifies a particular set of grouped metrics.
+# there are two kinds of sources:
+# 1. Spark internal sources, like MasterSource, WorkerSource, etc, which will
+# collect a Spark component's internal state. Each instance is paired with a
+# Spark source that is added automatically.
+# 2. Common sources, like JvmSource, which will collect low level state.
+# These can be added through configuration options and are then loaded
+# using reflection.
+#
+# A "sink" specifies where metrics are delivered to. Each instance can be
+# assigned one or more sinks.
+#
+# The sink|source field specifies whether the property relates to a sink or
+# source.
+#
+# The [name] field specifies the name of source or sink.
+#
+# The [options] field is the specific property of this source or sink. The
+# source or sink is responsible for parsing this property.
+#
+# Notes:
+# 1. To add a new sink, set the "class" option to a fully qualified class
+# name (see examples below).
+# 2. Some sinks involve a polling period. The minimum allowed polling period
+# is 1 second.
+# 3. Wild card properties can be overridden by more specific properties.
+# For example, master.sink.console.period takes precedence over
+# *.sink.console.period.
+# 4. A metrics specific configuration
+# "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be
+# added to Java properties using -Dspark.metrics.conf=xxx if you want to
+# customize metrics system. You can also put the file in ${SPARK_HOME}/conf
+# and it will be loaded automatically.
+# 5. MetricsServlet is added by default as a sink in master, worker and client
+# driver, you can send http request "/metrics/json" to get a snapshot of all the
+# registered metrics in json format. For master, requests "/metrics/master/json" and
+# "/metrics/applications/json" can be sent seperately to get metrics snapshot of
+# instance master and applications. MetricsServlet may not be configured by self.
+#
+
+## List of available sinks and their properties.
+
+# org.apache.spark.metrics.sink.ConsoleSink
+# Name: Default: Description:
+# period 10 Poll period
+# unit seconds Units of poll period
+
+# org.apache.spark.metrics.sink.CSVSink
+# Name: Default: Description:
+# period 10 Poll period
+# unit seconds Units of poll period
+# directory /tmp Where to store CSV files
+
+# org.apache.spark.metrics.sink.GangliaSink
+# Name: Default: Description:
+# host NONE Hostname or multicast group of Ganglia server
+# port NONE Port of Ganglia server(s)
+# period 10 Poll period
+# unit seconds Units of poll period
+# ttl 1 TTL of messages sent by Ganglia
+# mode multicast Ganglia network mode ('unicast' or 'multicast')
+
+# org.apache.spark.metrics.sink.JmxSink
+
+# org.apache.spark.metrics.sink.MetricsServlet
+# Name: Default: Description:
+# path VARIES* Path prefix from the web server root
+# sample false Whether to show entire set of samples for histograms ('false' or 'true')
+#
+# * Default path is /metrics/json for all instances except the master. The master has two paths:
+# /metrics/aplications/json # App information
+# /metrics/master/json # Master information
+
+# org.apache.spark.metrics.sink.GraphiteSink
+# Name: Default: Description:
+# host NONE Hostname of Graphite server
+# port NONE Port of Graphite server
+# period 10 Poll period
+# unit seconds Units of poll period
+# prefix EMPTY STRING Prefix to prepend to metric name
+
+## Examples
+# Enable JmxSink for all instances by class name
+#*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink
+
+# Enable ConsoleSink for all instances by class name
+#*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink
+
+# Polling period for ConsoleSink
+#*.sink.console.period=10
+
+#*.sink.console.unit=seconds
+
+# Master instance overlap polling period
+#master.sink.console.period=15
+
+#master.sink.console.unit=seconds
+
+# Enable CsvSink for all instances
+#*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink
+
+# Polling period for CsvSink
+#*.sink.csv.period=1
+
+#*.sink.csv.unit=minutes
+
+# Polling directory for CsvSink
+#*.sink.csv.directory=/tmp/
+
+# Worker instance overlap polling period
+#worker.sink.csv.period=10
+
+#worker.sink.csv.unit=minutes
+
+# Enable jvm source for instance master, worker, driver and executor
+#master.source.jvm.class=org.apache.spark.metrics.source.JvmSource
+
+#worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource
+
+#driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource
+
+#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource
+
+    </value>
+  </property>
+</configuration>

+ 86 - 0
ambari-server/src/main/resources/common-services/SPARK/1.2.0.2.2/metainfo.xml

@@ -0,0 +1,86 @@
+<?xml version="1.0"?>
+<!--Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+-->
+<metainfo>
+  <schemaVersion>2.0</schemaVersion>
+  <services>
+    <service>
+      <name>SPARK</name>
+      <displayName>Spark</displayName>
+      <comment>Apache Spark is a fast and general engine for large-scale data processing.</comment>
+      <version>1.2.0.2.2</version>
+      <components>
+        <component>
+          <name>SPARK_JOBHISTORYSERVER</name>
+          <displayName>Spark History Server</displayName>
+          <category>MASTER</category>
+          <cardinality>1</cardinality>
+          <commandScript>
+            <script>scripts/job_history_server.py</script>
+            <scriptType>PYTHON</scriptType>
+            <timeout>600</timeout>
+          </commandScript>
+        </component>
+        <component>
+          <name>SPARK_CLIENT</name>
+          <displayName>Spark Client</displayName>
+          <category>CLIENT</category>
+          <cardinality>1+</cardinality>
+          <commandScript>
+            <script>scripts/spark_client.py</script>
+            <scriptType>PYTHON</scriptType>
+            <timeout>600</timeout>
+          </commandScript>
+        </component>
+      </components>
+
+      <osSpecifics>
+        <osSpecific>
+          <osFamily>redhat5,redhat6,suse11,ubuntu12</osFamily>
+          <packages>
+            <package>
+              <name>spark</name>
+            </package>
+            <package>
+              <name>spark-python</name>
+            </package>
+          </packages>
+        </osSpecific>
+      </osSpecifics>
+
+      <configuration-dependencies>
+        <config-type>spark-defaults</config-type>
+        <config-type>spark-env</config-type>
+        <config-type>spark-log4j-properties</config-type>
+        <config-type>spark-metrics-properties</config-type>
+      </configuration-dependencies>
+
+      <commandScript>
+        <script>scripts/service_check.py</script>
+        <scriptType>PYTHON</scriptType>
+        <timeout>300</timeout>
+      </commandScript>
+
+      <requiredServices>
+        <service>YARN</service>
+        <service>TEZ</service>
+      </requiredServices>
+
+    </service>
+  </services>
+</metainfo>

+ 100 - 0
ambari-server/src/main/resources/common-services/SPARK/1.2.0.2.2/package/scripts/job_history_server.py

@@ -0,0 +1,100 @@
+#!/usr/bin/python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+import sys
+import os
+from resource_management import *
+from resource_management.core.exceptions import ComponentIsNotRunning
+from resource_management.core.logger import Logger
+from resource_management.core import shell
+from setup_spark import *
+
+
+class JobHistoryServer(Script):
+
+  def get_stack_to_component(self):
+     return {"HDP": "spark-historyserver"}
+
+  def pre_rolling_restart(self, env):
+    import params
+
+    env.set_params(params)
+    if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.1.0') >= 0:
+      Execute(format("hdp-select set spark-historyserver {version}"))
+
+  def install(self, env):
+    self.install_packages(env)
+    import params
+
+    env.set_params(params)
+    self.configure(env)
+
+  def stop(self, env, rolling_restart=False):
+    import params
+
+    env.set_params(params)
+    self.configure(env)
+    daemon_cmd = format('{spark_history_server_stop}')
+    Execute(daemon_cmd,
+            user=params.spark_user,
+            environment={'JAVA_HOME': params.java_home}
+    )
+    if os.path.isfile(params.spark_history_server_pid_file):
+      os.remove(params.spark_history_server_pid_file)
+
+
+  def start(self, env, rolling_restart=False):
+    import params
+
+    env.set_params(params)
+    self.configure(env)
+
+    # FIXME! TODO! remove this after soft link bug is fixed:
+    if not os.path.islink('/usr/hdp/current/spark'):
+      hdp_version = get_hdp_version()
+      cmd = 'ln -s /usr/hdp/' + hdp_version + '/spark /usr/hdp/current/spark'
+      Execute(cmd)
+
+    daemon_cmd = format('{spark_history_server_start}')
+    no_op_test = format(
+      'ls {spark_history_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_history_server_pid_file}` >/dev/null 2>&1')
+    Execute(daemon_cmd,
+            user=params.spark_user,
+            environment={'JAVA_HOME': params.java_home},
+            not_if=no_op_test
+    )
+
+  def status(self, env):
+    import status_params
+
+    env.set_params(status_params)
+    pid_file = format("{spark_history_server_pid_file}")
+    # Recursively check all existing gmetad pid files
+    check_process_status(pid_file)
+
+
+  def configure(self, env):
+    import params
+
+    env.set_params(params)
+    setup_spark(env)
+
+if __name__ == "__main__":
+  JobHistoryServer().execute()

+ 118 - 0
ambari-server/src/main/resources/common-services/SPARK/1.2.0.2.2/package/scripts/params.py

@@ -0,0 +1,118 @@
+#!/usr/bin/python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+from resource_management.libraries.functions.version import format_hdp_stack_version, compare_versions
+from resource_management import *
+from setup_spark import *
+import status_params
+
+config = Script.get_config()
+tmp_dir = Script.get_tmp_dir()
+
+# New Cluster Stack Version that is defined during the RESTART of a Rolling Upgrade
+version = default("/commandParams/version", None)
+
+hdp_stack_version = str(config['hostLevelParams']['stack_version'])
+hdp_stack_version = format_hdp_stack_version(hdp_stack_version)
+
+# TODO! FIXME! Version check is not working as of today :
+#   $ yum list installed | grep hdp-select
+#   hdp-select.noarch                            2.2.1.0-2340.el6           @HDP-2.2
+# And hdp_stack_version returned from hostLevelParams/stack_version is : 2.2.0.0
+# Commenting out for time being
+#stack_is_hdp22_or_further = hdp_stack_version != "" and compare_versions(hdp_stack_version, '2.2.1.0') >= 0
+
+stack_is_hdp22_or_further = hdp_stack_version != "" and compare_versions(hdp_stack_version, '2.2') >= 0
+version = default("/commandParams/version", None)
+if stack_is_hdp22_or_further:
+  hadoop_home = "/usr/hdp/current/hadoop-client"
+  spark_conf = '/etc/spark/conf'
+  spark_log_dir = config['configurations']['spark-env']['spark_log_dir']
+  spark_pid_dir = status_params.spark_pid_dir
+  spark_role_root = "spark-client"
+
+  command_role = default("/role", "")
+
+  if command_role == "SPARK_CLIENT":
+    spark_role_root = "spark-client"
+  elif command_role == "SPARK_JOBHISTORYSERVER":
+    spark_role_root = "spark-historyserver"
+
+  spark_home = format("/usr/hdp/current/{spark_role_root}")
+else:
+  pass
+
+java_home = config['hostLevelParams']['java_home']
+hadoop_conf_dir = "/etc/hadoop/conf"
+
+spark_user = status_params.spark_user
+spark_group = status_params.spark_group
+user_group = status_params.user_group
+spark_history_server_pid_file = status_params.spark_history_server_pid_file
+
+spark_history_server_start = format("{spark_home}/sbin/start-history-server.sh")
+spark_history_server_stop = format("{spark_home}/sbin/stop-history-server.sh")
+
+spark_submit_cmd = format("{spark_home}/bin/spark-submit")
+spark_smoke_example = "org.apache.spark.examples.SparkPi"
+spark_service_check_cmd = format(
+  "{spark_submit_cmd} --class {spark_smoke_example}  --master yarn-cluster  --num-executors 1 --driver-memory 256m  --executor-memory 256m   --executor-cores 1  {spark_home}/lib/spark-examples*.jar 1")
+
+spark_jobhistoryserver_hosts = default("/clusterHostInfo/spark_jobhistoryserver_hosts", [])
+
+if len(spark_jobhistoryserver_hosts) > 0:
+  spark_history_server_host = spark_jobhistoryserver_hosts[0]
+else:
+  spark_history_server_host = "localhost"
+
+# spark-defaults params
+spark_yarn_historyServer_address = default(spark_history_server_host, "localhost")
+spark_yarn_applicationMaster_waitTries = default(
+  "/configurations/spark-defaults/spark.yarn.applicationMaster.waitTries", '10')
+spark_yarn_submit_file_replication = default("/configurations/spark-defaults/spark.yarn.submit.file.replication", '3')
+spark_yarn_preserve_staging_files = default("/configurations/spark-defaults/spark.yarn.preserve.staging.files", "false")
+spark_yarn_scheduler_heartbeat_interval = default(
+  "/configurations/spark-defaults/spark.yarn.scheduler.heartbeat.interval-ms", "5000")
+spark_yarn_queue = default("/configurations/spark-defaults/spark.yarn.queue", "default")
+spark_yarn_containerLauncherMaxThreads = default(
+  "/configurations/spark-defaults/spark.yarn.containerLauncherMaxThreads", "25")
+spark_yarn_max_executor_failures = default("/configurations/spark-defaults/spark.yarn.max.executor.failures", "3")
+spark_yarn_executor_memoryOverhead = default("/configurations/spark-defaults/spark.yarn.executor.memoryOverhead", "384")
+spark_yarn_driver_memoryOverhead = default("/configurations/spark-defaults/spark.yarn.driver.memoryOverhead", "384")
+spark_history_provider = default("/configurations/spark-defaults/spark.history.provider",
+                                 "org.apache.spark.deploy.yarn.history.YarnHistoryProvider")
+spark_history_ui_port = default("/configurations/spark-defaults/spark.history.ui.port", "18080")
+
+spark_env_sh = config['configurations']['spark-env']['content']
+spark_log4j_properties = config['configurations']['spark-log4j-properties']['content']
+spark_metrics_properties = config['configurations']['spark-metrics-properties']['content']
+
+hive_server_host = default("/clusterHostInfo/hive_server_host", [])
+is_hive_installed = not len(hive_server_host) == 0
+
+hdp_full_version = get_hdp_version()
+
+spark_driver_extraJavaOptions = str(config['configurations']['spark-defaults']['spark.driver.extraJavaOptions'])
+if spark_driver_extraJavaOptions.find('-Dhdp.version') == -1:
+  spark_driver_extraJavaOptions = spark_driver_extraJavaOptions + ' -Dhdp.version=' + str(hdp_full_version)
+
+spark_yarn_am_extraJavaOptions = str(config['configurations']['spark-defaults']['spark.yarn.am.extraJavaOptions'])
+if spark_yarn_am_extraJavaOptions.find('-Dhdp.version') == -1:
+  spark_yarn_am_extraJavaOptions = spark_yarn_am_extraJavaOptions + ' -Dhdp.version=' + str(hdp_full_version)

+ 53 - 0
ambari-server/src/main/resources/common-services/SPARK/1.2.0.2.2/package/scripts/service_check.py

@@ -0,0 +1,53 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from resource_management import *
+
+
+class SparkServiceCheck(Script):
+  def service_check(self, env):
+    import params
+
+    env.set_params(params)
+    self.check_spark_job_history_server()
+    # self.check_spark_client()
+
+  def check_spark_job_history_server(self):
+    cmd = 'ps -ef | grep org.apache.spark.deploy.history.HistoryServer | grep -v grep'
+    code, output = shell.call(cmd, timeout=100)
+    if code == 0:
+      Logger.info('Spark job History Server up and running')
+    else:
+      Logger.debug('Spark job History Server not running')
+      raise ComponentIsNotRunning()
+
+  pass
+
+  # def check_spark_client(self):
+  # import params
+  #   smoke_cmd = params.spark_service_check_cmd
+  #   code, output = shell.call(smoke_cmd, timeout=100)
+  #   if code == 0:
+  #     Logger.info('Spark on Yarn Job can be submitted')
+  #   else:
+  #     Logger.debug('Spark on Yarn Job cannot be submitted')
+  #     raise ComponentIsNotRunning()
+  # pass
+
+if __name__ == "__main__":
+  SparkServiceCheck().execute()

+ 169 - 0
ambari-server/src/main/resources/common-services/SPARK/1.2.0.2.2/package/scripts/setup_spark.py

@@ -0,0 +1,169 @@
+#!/usr/bin/python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+import sys
+import fileinput
+import shutil
+import os
+from resource_management import *
+from resource_management.core.exceptions import ComponentIsNotRunning
+from resource_management.core.logger import Logger
+from resource_management.core import shell
+
+
+def setup_spark(env):
+  import params
+
+  env.set_params(params)
+
+  Directory([params.spark_pid_dir, params.spark_log_dir],
+            owner=params.spark_user,
+            group=params.user_group,
+            recursive=True
+  )
+
+  file_path = params.spark_conf + '/spark-defaults.conf'
+  create_file(file_path)
+
+  write_properties_to_file(file_path, spark_properties(params))
+
+  # create spark-env.sh in etc/conf dir
+  File(os.path.join(params.spark_conf, 'spark-env.sh'),
+       owner=params.spark_user,
+       group=params.spark_group,
+       content=InlineTemplate(params.spark_env_sh)
+  )
+
+  #create log4j.properties in etc/conf dir
+  File(os.path.join(params.spark_conf, 'log4j.properties'),
+       owner=params.spark_user,
+       group=params.spark_group,
+       content=params.spark_log4j_properties
+  )
+
+  #create metrics.properties in etc/conf dir
+  File(os.path.join(params.spark_conf, 'metrics.properties'),
+       owner=params.spark_user,
+       group=params.spark_group,
+       content=InlineTemplate(params.spark_metrics_properties)
+  )
+
+  if params.is_hive_installed:
+    XmlConfig("hive-site.xml",
+              conf_dir=params.spark_conf,
+              configurations=params.config['configurations']['hive-site'],
+              configuration_attributes=params.config['configuration_attributes']['hive-site'],
+              owner=params.spark_user,
+              group=params.spark_group,
+              mode=0644)
+
+
+def spark_properties(params):
+  spark_dict = dict()
+
+  spark_dict['spark.yarn.executor.memoryOverhead'] = params.spark_yarn_executor_memoryOverhead
+  spark_dict['spark.yarn.driver.memoryOverhead'] = params.spark_yarn_driver_memoryOverhead
+  spark_dict['spark.yarn.applicationMaster.waitTries'] = params.spark_yarn_applicationMaster_waitTries
+  spark_dict['spark.yarn.scheduler.heartbeat.interval-ms'] = params.spark_yarn_scheduler_heartbeat_interval
+  spark_dict['spark.yarn.max_executor.failures'] = params.spark_yarn_max_executor_failures
+  spark_dict['spark.yarn.queue'] = params.spark_yarn_queue
+  spark_dict['spark.yarn.containerLauncherMaxThreads'] = params.spark_yarn_containerLauncherMaxThreads
+  spark_dict['spark.yarn.submit.file.replication'] = params.spark_yarn_submit_file_replication
+  spark_dict['spark.yarn.preserve.staging.files'] = params.spark_yarn_preserve_staging_files
+
+  # Hardcoded paramaters to be added to spark-defaults.conf
+  spark_dict['spark.yarn.historyServer.address'] = params.spark_history_server_host + ':' + str(
+    params.spark_history_ui_port)
+  spark_dict['spark.yarn.services'] = 'org.apache.spark.deploy.yarn.history.YarnHistoryService'
+  spark_dict['spark.history.provider'] = 'org.apache.spark.deploy.yarn.history.YarnHistoryProvider'
+  spark_dict['spark.history.ui.port'] = params.spark_history_ui_port
+
+  spark_dict['spark.driver.extraJavaOptions'] = params.spark_driver_extraJavaOptions
+  spark_dict['spark.yarn.am.extraJavaOptions'] = params.spark_yarn_am_extraJavaOptions
+
+  return spark_dict
+
+
+def write_properties_to_file(file_path, value):
+  for key in value:
+    modify_config(file_path, key, value[key])
+
+
+def modify_config(filepath, variable, setting):
+  var_found = False
+  already_set = False
+  V = str(variable)
+  S = str(setting)
+
+  if ' ' in S:
+    S = '%s' % S
+
+  for line in fileinput.input(filepath, inplace=1):
+    if not line.lstrip(' ').startswith('#') and '=' in line:
+      _infile_var = str(line.split('=')[0].rstrip(' '))
+      _infile_set = str(line.split('=')[1].lstrip(' ').rstrip())
+      if var_found == False and _infile_var.rstrip(' ') == V:
+        var_found = True
+        if _infile_set.lstrip(' ') == S:
+          already_set = True
+        else:
+          line = "%s %s\n" % (V, S)
+
+    sys.stdout.write(line)
+
+  if not var_found:
+    with open(filepath, "a") as f:
+      f.write("%s \t %s\n" % (V, S))
+  elif already_set == True:
+    pass
+  else:
+    pass
+
+  return
+
+
+def create_file(file_path):
+  try:
+    file = open(file_path, 'w')
+    file.close()
+  except:
+    print('Unable to create file: ' + file_path)
+    sys.exit(0)
+
+
+def get_hdp_version():
+  try:
+    command = 'hdp-select status hadoop-client'
+    return_code, hdp_output = shell.call(command, timeout=20)
+  except Exception, e:
+    Logger.error(str(e))
+    raise Fail('Unable to execute hdp-select command to retrieve the version.')
+
+  if return_code != 0:
+    raise Fail(
+      'Unable to determine the current version because of a non-zero return code of {0}'.format(str(return_code)))
+
+  hdp_version = re.sub('hadoop-client - ', '', hdp_output)
+  match = re.match('[0-9]+.[0-9]+.[0-9]+.[0-9]+-[0-9]+', hdp_version)
+
+  if match is None:
+    raise Fail('Failed to get extracted version')
+
+  return hdp_version

+ 56 - 0
ambari-server/src/main/resources/common-services/SPARK/1.2.0.2.2/package/scripts/spark_client.py

@@ -0,0 +1,56 @@
+#!/usr/bin/python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+import sys
+from resource_management import *
+from resource_management.core.exceptions import ComponentIsNotRunning
+from resource_management.core.logger import Logger
+from resource_management.core import shell
+from setup_spark import setup_spark
+
+
+class SparkClient(Script):
+  def get_stack_to_component(self):
+    return {"HDP": "spark-client"}
+
+  def pre_rolling_restart(self, env):
+    import params
+
+    env.set_params(params)
+    if params.version and compare_versions(format_hdp_stack_version(params.version), '2.2.1.0') >= 0:
+      Execute(format("hdp-select set spark-client {version}"))
+
+  def install(self, env):
+    self.install_packages(env)
+    self.configure(env)
+
+  def configure(self, env):
+    import params
+
+    env.set_params(params)
+    setup_spark(env)
+
+  def status(self, env):
+    raise ClientComponentHasNoStatus()
+
+
+if __name__ == "__main__":
+  SparkClient().execute()
+

+ 30 - 0
ambari-server/src/main/resources/common-services/SPARK/1.2.0.2.2/package/scripts/status_params.py

@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+"""
+
+from resource_management import *
+
+config = Script.get_config()
+
+spark_user = config['configurations']['spark-env']['spark_user']
+spark_group = config['configurations']['spark-env']['spark_group']
+user_group = config['configurations']['cluster-env']['user_group']
+
+spark_pid_dir = config['configurations']['spark-env']['spark_pid_dir']
+spark_history_server_pid_file = format("{spark_pid_dir}/spark-{spark_user}-org.apache.spark.deploy.history.HistoryServer-1.pid")

+ 29 - 0
ambari-server/src/main/resources/stacks/HDP/2.2/services/SPARK/metainfo.xml

@@ -0,0 +1,29 @@
+<?xml version="1.0"?>
+<!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+<metainfo>
+    <schemaVersion>2.0</schemaVersion>
+    <services>
+        <service>
+            <name>SPARK</name>
+            <extends>common-services/SPARK/1.2.0.2.2</extends>		
+        </service>
+    </services>
+</metainfo>