Browse Source

AMBARI-9994. Add Mahout to HDP Stack.(vbrodetskyi)

Vitaly Brodetskyi 10 years ago
parent
commit
ea8e32ff1e

+ 2 - 0
ambari-server/src/main/java/org/apache/ambari/server/Role.java

@@ -80,6 +80,8 @@ public class Role {
   public static final Role PEERSTATUS = valueOf("PEERSTATUS");
   public static final Role PIG = valueOf("PIG");
   public static final Role PIG_SERVICE_CHECK = valueOf("PIG_SERVICE_CHECK");
+  public static final Role MAHOUT = valueOf("MAHOUT");
+  public static final Role MAHOUT_SERVICE_CHECK = valueOf("MAHOUT_SERVICE_CHECK");
   public static final Role RESOURCEMANAGER = valueOf("RESOURCEMANAGER");
   public static final Role SECONDARY_NAMENODE = valueOf("SECONDARY_NAMENODE");
   public static final Role SQOOP = valueOf("SQOOP");

+ 1 - 0
ambari-server/src/main/java/org/apache/ambari/server/metadata/ActionMetadata.java

@@ -68,6 +68,7 @@ public class ActionMetadata {
     serviceClients.put("hcat"       , Role.HCAT.toString());
     serviceClients.put("oozie"      , Role.OOZIE_CLIENT.toString());
     serviceClients.put("pig"        , Role.PIG.toString());
+    serviceClients.put("mahout"     , Role.MAHOUT.toString());
     serviceClients.put("sqoop"      , Role.SQOOP.toString());
     serviceClients.put("yarn"       , Role.YARN_CLIENT.toString());
     serviceClients.put("kerberos"   , Role.KERBEROS_CLIENT.toString());

+ 30 - 0
ambari-server/src/main/resources/common-services/MAHOUT/1.0.0.2.3/configuration/mahout-env.xml

@@ -0,0 +1,30 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+
+<configuration>
+    <property>
+        <name>mahout_user</name>
+        <value>mahout</value>
+        <property-type>USER</property-type>
+        <description>Mahout user</description>
+    </property>
+</configuration>

+ 68 - 0
ambari-server/src/main/resources/common-services/MAHOUT/1.0.0.2.3/metainfo.xml

@@ -0,0 +1,68 @@
+<?xml version="1.0"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<metainfo>
+    <schemaVersion>2.0</schemaVersion>
+    <services>
+        <service>
+            <name>MAHOUT</name>
+            <displayName>Mahout</displayName>
+            <comment>Project of the Apache Software Foundation to produce free implementations of distributed or
+                otherwise scalable machine learning algorithms focused primarily in the areas of collaborative
+                filtering, clustering and classification</comment>
+            <version>1.0.0.2.3</version>
+            <components>
+                <component>
+                    <name>MAHOUT</name>
+                    <displayName>Mahout</displayName>
+                    <category>CLIENT</category>
+                    <cardinality>0+</cardinality>
+                    <versionAdvertised>true</versionAdvertised>
+                    <commandScript>
+                        <script>scripts/mahout_client.py</script>
+                        <scriptType>PYTHON</scriptType>
+                        <timeout>1200</timeout>
+                    </commandScript>
+                </component>
+            </components>
+            <osSpecifics>
+                <osSpecific>
+                    <osFamily>any</osFamily>
+                    <packages>
+                        <package>
+                            <name>mahout</name>
+                        </package>
+                    </packages>
+                </osSpecific>
+            </osSpecifics>
+
+            <commandScript>
+                <script>scripts/service_check.py</script>
+                <scriptType>PYTHON</scriptType>
+                <timeout>300</timeout>
+            </commandScript>
+
+            <requiredServices>
+                <service>YARN</service>
+            </requiredServices>
+
+            <configuration-dependencies>
+            </configuration-dependencies>
+
+        </service>
+    </services>
+</metainfo>

+ 34 - 0
ambari-server/src/main/resources/common-services/MAHOUT/1.0.0.2.3/package/scripts/mahout.py

@@ -0,0 +1,34 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Ambari Agent
+
+"""
+import os
+
+from resource_management import *
+
+def mahout():
+  import params
+
+  Directory( params.mahout_conf_dir,
+             recursive = True,
+             owner = params.mahout_user,
+             group = params.user_group
+  )
+
+  # TODO add configs creation

+ 52 - 0
ambari-server/src/main/resources/common-services/MAHOUT/1.0.0.2.3/package/scripts/mahout_client.py

@@ -0,0 +1,52 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Ambari Agent
+
+"""
+
+import sys
+from resource_management import *
+from mahout import mahout
+
+
+class MahoutClient(Script):
+
+  def get_stack_to_component(self):
+    return {"HDP": "mahout"}
+
+  def pre_rolling_restart(self, env):
+    import params
+    env.set_params(params)
+
+    Execute(('hdp-select', 'set', 'mahout', params.version),
+            sudo = True)
+
+  def install(self, env):
+    self.install_packages(env)
+    self.configure(env)
+
+  def configure(self, env):
+    import params
+    env.set_params(params)
+    mahout()
+
+  def status(self, env):
+    raise ClientComponentHasNoStatus()
+
+if __name__ == "__main__":
+  MahoutClient().execute()

+ 71 - 0
ambari-server/src/main/resources/common-services/MAHOUT/1.0.0.2.3/package/scripts/params.py

@@ -0,0 +1,71 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Ambari Agent
+
+"""
+
+from resource_management.libraries.functions.version import format_hdp_stack_version, compare_versions
+from resource_management import *
+
+# server configurations
+config = Script.get_config()
+tmp_dir = Script.get_tmp_dir()
+
+stack_name = default("/hostLevelParams/stack_name", None)
+
+stack_version_unformatted = str(config['hostLevelParams']['stack_version'])
+hdp_stack_version = format_hdp_stack_version(stack_version_unformatted)
+
+# New Cluster Stack Version that is defined during the RESTART of a Rolling Upgrade
+version = default("/commandParams/version", None)
+
+#mahout params
+mahout_conf_dir = "/etc/mahout/conf"
+mahout_home = "/usr/hdp/current/mahout-client"
+mahout_user = config['configurations']['mahout-env']['mahout_user']
+
+#hadoop params
+hadoop_bin_dir = "/usr/hdp/current/hadoop-client/bin"
+hadoop_home = '/usr/hdp/current/hadoop-client'
+
+hadoop_conf_dir = "/etc/hadoop/conf"
+hdfs_user = config['configurations']['hadoop-env']['hdfs_user']
+hdfs_principal_name = config['configurations']['hadoop-env']['hdfs_principal_name']
+hdfs_user_keytab = config['configurations']['hadoop-env']['hdfs_user_keytab']
+smokeuser = config['configurations']['cluster-env']['smokeuser']
+smokeuser_principal = config['configurations']['cluster-env']['smokeuser_principal_name']
+user_group = config['configurations']['cluster-env']['user_group']
+security_enabled = config['configurations']['cluster-env']['security_enabled']
+smoke_user_keytab = config['configurations']['cluster-env']['smokeuser_keytab']
+kinit_path_local = functions.get_kinit_path()
+
+# not supporting 32 bit jdk.
+java64_home = config['hostLevelParams']['java_home']
+
+import functools
+#create partial functions with common arguments for every HdfsDirectory call
+#to create hdfs directory we need to call params.HdfsDirectory in code
+HdfsDirectory = functools.partial(
+  HdfsDirectory,
+  conf_dir=hadoop_conf_dir,
+  hdfs_user=hdfs_user,
+  security_enabled = security_enabled,
+  keytab = hdfs_user_keytab,
+  kinit_path_local = kinit_path_local,
+  bin_dir = hadoop_bin_dir
+)

+ 93 - 0
ambari-server/src/main/resources/common-services/MAHOUT/1.0.0.2.3/package/scripts/service_check.py

@@ -0,0 +1,93 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Ambari Agent
+
+"""
+
+from resource_management import *
+from resource_management.libraries.functions.dynamic_variable_interpretation import copy_tarballs_to_hdfs
+
+class MahoutServiceCheck(Script):
+  def service_check(self, env):
+    import params
+    env.set_params(params)
+
+    create_input_dir_cmd = format("fs -mkdir /user/{smokeuser}/mahoutsmokeinput")
+    copy_file_to_hdfs_cmd = format("fs -put {tmp_dir}/sample-mahout-test.txt /user/{smokeuser}/mahoutsmokeinput/")
+    mahout_command = format("mahout seqdirectory --input /user/{smokeuser}/mahoutsmokeinput/sample-mahout-test.txt "
+                            "--output /user/{smokeuser}/mahoutsmokeoutput/ --charset utf-8")
+    test_command = format("fs -test -e /user/{smokeuser}/mahoutsmokeoutput/_SUCCESS")
+    remove_output_input_dirs_cmd = format("fs -rm -r -f /user/{smokeuser}/mahoutsmokeoutput "
+                                          "/user/{smokeuser}/mahoutsmokeinput")
+
+    ExecuteHadoop( remove_output_input_dirs_cmd,
+                   tries = 3,
+                   try_sleep = 5,
+                   user = params.smokeuser,
+                   conf_dir = params.hadoop_conf_dir,
+                   # for kinit run
+                   keytab = params.smoke_user_keytab,
+                   principal = params.smokeuser_principal,
+                   security_enabled = params.security_enabled,
+                   kinit_path_local = params.kinit_path_local,
+                   bin_dir = params.hadoop_bin_dir
+                   )
+
+    ExecuteHadoop( create_input_dir_cmd,
+                 tries = 3,
+                 try_sleep = 5,
+                 user = params.smokeuser,
+                 conf_dir = params.hadoop_conf_dir,
+                 bin_dir = params.hadoop_bin_dir
+    )
+
+    File( format("{tmp_dir}/sample-mahout-test.txt"),
+        content = "Test text which will be converted to sequence file.",
+        mode = 0755
+    )
+
+    ExecuteHadoop( copy_file_to_hdfs_cmd,
+                   tries = 3,
+                   try_sleep = 5,
+                   user = params.smokeuser,
+                   conf_dir = params.hadoop_conf_dir,
+                   bin_dir = params.hadoop_bin_dir
+    )
+
+    Execute( mahout_command,
+             tries = 3,
+             try_sleep = 5,
+             environment={'HADOOP_HOME': params.hadoop_home,'HADOOP_CONF_DIR': params.hadoop_conf_dir,
+                          'MAHOUT_HOME': params.mahout_home,'JAVA_HOME': params.java64_home},
+             path = format('/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin'),
+             user = params.smokeuser
+    )
+
+    ExecuteHadoop( test_command,
+                   tries = 10,
+                   try_sleep = 6,
+                   user = params.smokeuser,
+                   conf_dir = params.hadoop_conf_dir,
+                   bin_dir = params.hadoop_bin_dir
+    )
+
+
+if __name__ == "__main__":
+  MahoutServiceCheck().execute()
+
+

+ 8 - 0
ambari-server/src/main/resources/stacks/HDP/2.3/role_command_order.json

@@ -0,0 +1,8 @@
+{
+  "_comment" : "Record format:",
+  "_comment" : "blockedRole-blockedCommand: [blockerRole1-blockerCommand1, blockerRole2-blockerCommand2, ...]",
+  "general_deps" : {
+    "_comment" : "dependencies for all cases",
+    "MAHOUT_SERVICE_CHECK-SERVICE_CHECK": ["NODEMANAGER-START", "RESOURCEMANAGER-START"]
+  }
+}

+ 26 - 0
ambari-server/src/main/resources/stacks/HDP/2.3/services/MAHOUT/metainfo.xml

@@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<metainfo>
+    <schemaVersion>2.0</schemaVersion>
+    <services>
+        <service>
+            <name>MAHOUT</name>
+            <extends>common-services/MAHOUT/1.0.0.2.3</extends>
+        </service>
+    </services>
+</metainfo>

+ 43 - 0
ambari-server/src/test/python/stacks/2.3/MAHOUT/test_mahout_client.py

@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+from stacks.utils.RMFTestCase import *
+import json
+
+class TestMahoutClient(RMFTestCase):
+  COMMON_SERVICES_PACKAGE_DIR = "MAHOUT/1.0.0.2.3/package"
+  STACK_VERSION = "2.3"
+
+  def test_configure_default(self):
+    self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/mahout_client.py",
+                       classname = "MahoutClient",
+                       command = "configure",
+                       config_file="default.json",
+                       hdp_stack_version = self.STACK_VERSION,
+                       target = RMFTestCase.TARGET_COMMON_SERVICES
+    )
+
+    self.assertResourceCalled('Directory', '/etc/mahout/conf',
+                              owner = 'mahout',
+                              group = 'hadoop',
+                              recursive = True,
+                              )
+    self.assertNoMoreResources()
+

+ 86 - 0
ambari-server/src/test/python/stacks/2.3/MAHOUT/test_mahout_service_check.py

@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+from stacks.utils.RMFTestCase import *
+
+class TestMahoutClient(RMFTestCase):
+  COMMON_SERVICES_PACKAGE_DIR = "MAHOUT/1.0.0.2.3/package"
+  STACK_VERSION = "2.3"
+
+  def test_configure_default(self):
+    self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/service_check.py",
+                       classname = "MahoutServiceCheck",
+                       command = "service_check",
+                       config_file="default.json",
+                       hdp_stack_version = self.STACK_VERSION,
+                       target = RMFTestCase.TARGET_COMMON_SERVICES
+    )
+
+    self.assertResourceCalled('ExecuteHadoop', 'fs -rm -r -f /user/ambari-qa/mahoutsmokeoutput /user/ambari-qa/mahoutsmokeinput',
+                              security_enabled = False,
+                              keytab = UnknownConfigurationMock(),
+                              conf_dir = '/etc/hadoop/conf',
+                              try_sleep = 5,
+                              kinit_path_local = '/usr/bin/kinit',
+                              tries = 3,
+                              user = 'ambari-qa',
+                              bin_dir = '/usr/hdp/current/hadoop-client/bin',
+                              principal = UnknownConfigurationMock(),
+                              )
+    self.assertResourceCalled('ExecuteHadoop', 'fs -mkdir /user/ambari-qa/mahoutsmokeinput',
+                              try_sleep = 5,
+                              tries = 3,
+                              bin_dir = '/usr/hdp/current/hadoop-client/bin',
+                              user = 'ambari-qa',
+                              conf_dir = '/etc/hadoop/conf',
+                              )
+    self.assertResourceCalled('File', '/tmp/sample-mahout-test.txt',
+                              content = 'Test text which will be converted to sequence file.',
+                              mode = 0755,
+                              )
+    self.assertResourceCalled('ExecuteHadoop', 'fs -put /tmp/sample-mahout-test.txt /user/ambari-qa/mahoutsmokeinput/',
+                              try_sleep = 5,
+                              tries = 3,
+                              bin_dir = '/usr/hdp/current/hadoop-client/bin',
+                              user = 'ambari-qa',
+                              conf_dir = '/etc/hadoop/conf',
+                              )
+    self.assertResourceCalled('Execute', 'mahout seqdirectory --input /user/ambari-qa/mahoutsmokeinput/'
+                                         'sample-mahout-test.txt --output /user/ambari-qa/mahoutsmokeoutput/ '
+                                         '--charset utf-8',
+                              environment = {'HADOOP_CONF_DIR': '/etc/hadoop/conf',
+                                             'HADOOP_HOME': '/usr/hdp/current/hadoop-client',
+                                             'JAVA_HOME': u'/usr/jdk64/jdk1.7.0_45',
+                                             'MAHOUT_HOME': '/usr/hdp/current/mahout-client'},
+                              path = ['/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin'],
+                              tries = 3,
+                              user = 'ambari-qa',
+                              try_sleep = 5,
+                              )
+    self.assertResourceCalled('ExecuteHadoop', 'fs -test -e /user/ambari-qa/mahoutsmokeoutput/_SUCCESS',
+                              try_sleep = 6,
+                              tries = 10,
+                              bin_dir = '/usr/hdp/current/hadoop-client/bin',
+                              user = 'ambari-qa',
+                              conf_dir = '/etc/hadoop/conf',
+                              )
+    self.assertNoMoreResources()
+
+

File diff suppressed because it is too large
+ 83 - 0
ambari-server/src/test/python/stacks/2.3/configs/default.json


Some files were not shown because too many files changed in this diff