소스 검색

AMBARI-4105. Pig on HDP1. Using resource management lib (aonishuk)

Andrew Onischuk 11 년 전
부모
커밋
ed77411d43

+ 38 - 8
ambari-server/src/main/resources/stacks/HDP/1.3._/services/PIG/metainfo.xml

@@ -16,15 +16,45 @@
    limitations under the License.
 -->
 <metainfo>
-    <user>root</user>
-    <comment>Scripting platform for analyzing large datasets</comment>
-    <version>0.11.1.1.3.3.0</version>
-
-    <components>
+  <schemaVersion>2.0</schemaVersion>
+  <services>
+    <service>
+      <name>PIG</name>
+      <comment>Scripting platform for analyzing large datasets</comment>
+      <version>0.11.1.1.3.3.0</version>
+      <components>
         <component>
-            <name>PIG</name>
-            <category>CLIENT</category>
+          <name>PIG</name>
+          <category>CLIENT</category>
+          <commandScript>
+            <script>scripts/pig_client.py</script>
+            <scriptType>PYTHON</scriptType>
+            <timeout>600</timeout>
+          </commandScript>
         </component>
-    </components>
+      </components>
+      <osSpecifics>
+        <osSpecific>
+          <osType>centos6</osType>
+          <packages>
+            <package>
+              <type>rpm</type>
+              <name>pig</name>
+            </package>
+          </packages>
+        </osSpecific>
+      </osSpecifics>
+
+      <commandScript>
+        <script>scripts/service_check.py</script>
+        <scriptType>PYTHON</scriptType>
+        <timeout>300</timeout>
+      </commandScript>
+
+      <configuration-dependencies>
+        <config-type>global</config-type>
+      </configuration-dependencies>
 
+    </service>
+  </services>
 </metainfo>

+ 18 - 0
ambari-server/src/main/resources/stacks/HDP/1.3._/services/PIG/package/files/pigSmoke.sh

@@ -0,0 +1,18 @@
+/*Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License */
+
+A = load 'passwd' using PigStorage(':');
+B = foreach A generate \$0 as id;
+store B into 'pigsmoke.out';

+ 36 - 0
ambari-server/src/main/resources/stacks/HDP/1.3._/services/PIG/package/scripts/params.py

@@ -0,0 +1,36 @@
+#!/usr/bin/env python2.6
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Ambari Agent
+
+"""
+
+from resource_management import *
+
+# server configurations
+config = Script.get_config()
+
+pig_conf_dir = "/etc/pig/conf"
+hadoop_conf_dir = "/etc/hadoop/conf"
+hdfs_user = config['configurations']['global']['hdfs_user']
+smokeuser = config['configurations']['global']['smokeuser']
+user_group = config['configurations']['global']['user_group']
+
+# not supporting 32 bit jdk.
+java64_home = config['configurations']['global']['java64_home']
+hadoop_home = "/usr"

+ 46 - 0
ambari-server/src/main/resources/stacks/HDP/1.3._/services/PIG/package/scripts/pig.py

@@ -0,0 +1,46 @@
+#!/usr/bin/env python2.6
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Ambari Agent
+
+"""
+
+from resource_management import *
+
+def pig():
+  import params
+
+  Directory( params.pig_conf_dir,
+    owner = params.hdfs_user,
+    group = params.user_group
+  )
+
+  pig_TemplateConfig( ['pig-env.sh','pig.properties','log4j.properties'])
+  
+  
+def pig_TemplateConfig(name):
+  import params
+  
+  if not isinstance(name, list):
+    name = [name]
+    
+  for x in name:
+    TemplateConfig( format("{pig_conf_dir}/{x}"),
+        owner = params.hdfs_user
+    )
+  

+ 52 - 0
ambari-server/src/main/resources/stacks/HDP/1.3._/services/PIG/package/scripts/pig_client.py

@@ -0,0 +1,52 @@
+#!/usr/bin/env python2.6
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Ambari Agent
+
+"""
+
+import sys
+from resource_management import *
+from pig import pig
+
+         
+class PigClient(Script):
+  def install(self, env):
+    self.install_packages(env)
+    self.configure(env)
+    
+  def configure(self, env):
+    import params
+    env.set_params(params)
+    pig()
+
+  def status(self, env):
+    raise ClientComponentHasNoStatus()
+    
+#for tests
+def main():
+  command_type = 'install'
+  command_data_file = '/root/workspace/Pig/input.json'
+  basedir = '/root/workspace/Pig/main'
+  sys.argv = ["", command_type, command_data_file, basedir]
+  
+  PigClient().execute()
+  
+if __name__ == "__main__":
+  #main()
+  PigClient().execute()

+ 75 - 0
ambari-server/src/main/resources/stacks/HDP/1.3._/services/PIG/package/scripts/service_check.py

@@ -0,0 +1,75 @@
+#!/usr/bin/env python2.6
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Ambari Agent
+
+"""
+
+from resource_management import *
+
+class PigServiceCheck(Script):
+  def service_check(self, env):
+    import params
+    env.set_params(params)
+    
+    input_file = 'passwd'
+    output_file = "pigsmoke.out"
+  
+    cleanup_cmd = format("dfs -rmr {output_file} {input_file}")
+    #cleanup put below to handle retries; if retrying there wil be a stale file that needs cleanup; exit code is fn of second command
+    create_file_cmd = format("{cleanup_cmd}; hadoop dfs -put /etc/passwd {input_file} ") #TODO: inconsistent that second command needs hadoop
+    test_cmd = format("fs -test -e {output_file}")
+  
+    ExecuteHadoop( create_file_cmd,
+      tries     = 3,
+      try_sleep = 5,
+      user      = params.smokeuser,
+      conf_dir = params.hadoop_conf_dir
+    )
+  
+    File( '/tmp/pigSmoke.sh',
+      content = StaticFile("pigSmoke.sh"),
+      mode = 0755
+    )
+  
+    Execute( "pig /tmp/pigSmoke.sh",
+      tries     = 3,
+      try_sleep = 5,
+      path      = '/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin',
+      user      = params.smokeuser,
+      logoutput = True
+    )
+  
+    ExecuteHadoop( test_cmd,
+      user      = params.smokeuser,
+      conf_dir = params.hadoop_conf_dir
+    )
+    
+def main():
+  import sys
+  command_type = 'service_check'
+  command_data_file = '/root/workspace/Pig/input.json'
+  basedir = '/root/workspace/Pig/main'
+  sys.argv = ["", command_type, command_data_file, basedir]
+  
+  PigServiceCheck().execute()
+  
+if __name__ == "__main__":
+  #main()
+  PigServiceCheck().execute()
+  

+ 30 - 0
ambari-server/src/main/resources/stacks/HDP/1.3._/services/PIG/package/templates/log4j.properties.j2

@@ -0,0 +1,30 @@
+#
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
+#
+
+# ***** Set root logger level to DEBUG and its only appender to A.
+log4j.logger.org.apache.pig=info, A
+
+# ***** A is set to be a ConsoleAppender.
+log4j.appender.A=org.apache.log4j.ConsoleAppender
+# ***** A uses PatternLayout.
+log4j.appender.A.layout=org.apache.log4j.PatternLayout
+log4j.appender.A.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n

+ 17 - 0
ambari-server/src/main/resources/stacks/HDP/1.3._/services/PIG/package/templates/pig-env.sh.j2

@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+JAVA_HOME={{java64_home}}
+HADOOP_HOME=${HADOOP_HOME:-{{hadoop_home}}}

+ 55 - 0
ambari-server/src/main/resources/stacks/HDP/1.3._/services/PIG/package/templates/pig.properties.j2

@@ -0,0 +1,55 @@
+#
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
+#
+
+# Pig configuration file. All values can be overwritten by command line arguments.
+
+# log4jconf log4j configuration file
+# log4jconf=./conf/log4j.properties
+
+# a file that contains pig script
+#file=
+
+# load jarfile, colon separated
+#jar=
+
+#verbose print all log messages to screen (default to print only INFO and above to screen)
+#verbose=true
+
+#exectype local|mapreduce, mapreduce is default
+#exectype=local
+
+#pig.logfile=
+
+#Do not spill temp files smaller than this size (bytes)
+#pig.spill.size.threshold=5000000
+#EXPERIMENT: Activate garbage collection when spilling a file bigger than this size (bytes)
+#This should help reduce the number of files being spilled.
+#pig.spill.gc.activation.size=40000000
+
+#the following two parameters are to help estimate the reducer number
+#pig.exec.reducers.bytes.per.reducer=1000000000
+#pig.exec.reducers.max=999
+
+#Use this option only when your Pig job will otherwise die because of
+#using more counter than hadoop configured limit
+#pig.disable.counter=true
+hcat.bin=/usr/bin/hcat

+ 52 - 0
ambari-server/src/main/resources/stacks/HDP/1.3._/services/PIG2/configuration/pig.properties

@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Pig default configuration file. All values can be overwritten by pig.properties and command line arguments.
+# see bin/pig -help
+
+# brief logging (no timestamps)
+brief=false
+
+#debug level, INFO is default
+debug=INFO
+
+#verbose print all log messages to screen (default to print only INFO and above to screen)
+verbose=false
+
+#exectype local|mapreduce, mapreduce is default
+exectype=mapreduce
+
+#Enable insertion of information about script into hadoop job conf 
+pig.script.info.enabled=true
+
+#Do not spill temp files smaller than this size (bytes)
+pig.spill.size.threshold=5000000
+#EXPERIMENT: Activate garbage collection when spilling a file bigger than this size (bytes)
+#This should help reduce the number of files being spilled.
+pig.spill.gc.activation.size=40000000
+
+#the following two parameters are to help estimate the reducer number
+pig.exec.reducers.bytes.per.reducer=1000000000
+pig.exec.reducers.max=999
+
+#Temporary location to store the intermediate data.
+pig.temp.dir=/tmp/
+
+#Threshold for merging FRJoin fragment files
+pig.files.concatenation.threshold=100
+pig.optimistic.files.concatenation=false;
+
+pig.disable.counter=false

+ 30 - 0
ambari-server/src/main/resources/stacks/HDP/1.3._/services/PIG2/metainfo.xml

@@ -0,0 +1,30 @@
+<?xml version="1.0"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<metainfo>
+    <user>root</user>
+    <comment>Scripting platform for analyzing large datasets</comment>
+    <version>0.11.1.1.3.3.0</version>
+
+    <components>
+        <component>
+            <name>PIG</name>
+            <category>CLIENT</category>
+        </component>
+    </components>
+
+</metainfo>