|
@@ -27,18 +27,20 @@ import traceback
|
|
|
import re
|
|
|
import socket
|
|
|
|
|
|
-cluster_name="perf-cluster"
|
|
|
-ambari_repo_file_url="http://s3.amazonaws.com/dev.hortonworks.com/ambari/centos6/2.x/latest/trunk/ambaribn.repo"
|
|
|
+cluster_prefix = "perf"
|
|
|
+ambari_repo_file_url = "http://s3.amazonaws.com/dev.hortonworks.com/ambari/centos6/2.x/latest/trunk/ambaribn.repo"
|
|
|
|
|
|
-public_hostname_script="foo"
|
|
|
-hostname_script="foo"
|
|
|
+public_hostname_script = "foo"
|
|
|
+hostname_script = "foo"
|
|
|
|
|
|
-start_number=1
|
|
|
-number_of_agents_on_host=50
|
|
|
+NUMBER_OF_AGENTS_ON_HOST = 50
|
|
|
|
|
|
|
|
|
class SSH:
|
|
|
- """ Ssh implementation of this """
|
|
|
+ """
|
|
|
+ Ssh implementation of this
|
|
|
+ """
|
|
|
+
|
|
|
def __init__(self, user, sshkey_file, host, command, custom_option='', errorMessage = None):
|
|
|
self.user = user
|
|
|
self.sshkey_file = sshkey_file
|
|
@@ -46,8 +48,6 @@ class SSH:
|
|
|
self.command = command
|
|
|
self.errorMessage = errorMessage
|
|
|
self.custom_option = custom_option
|
|
|
- pass
|
|
|
-
|
|
|
|
|
|
def run(self):
|
|
|
sshcommand = ["ssh",
|
|
@@ -91,12 +91,15 @@ class SSH:
|
|
|
|
|
|
print "SSH command execution finished"
|
|
|
|
|
|
- return {"exitstatus": sshstat.returncode, "log": log, "errormsg": errorMsg}
|
|
|
+ return {"exitstatus": sshstat.returncode, "log": log, "errormsg": errorMsg}
|
|
|
|
|
|
|
|
|
class SCP:
|
|
|
- """ SCP implementation that is thread based. The status can be returned using
|
|
|
- status val """
|
|
|
+ """
|
|
|
+ SCP implementation that is thread based. The status can be returned using
|
|
|
+ status val
|
|
|
+ """
|
|
|
+
|
|
|
def __init__(self, user, sshkey_file, host, inputFile, remote, errorMessage = None):
|
|
|
self.user = user
|
|
|
self.sshkey_file = sshkey_file
|
|
@@ -104,8 +107,6 @@ class SCP:
|
|
|
self.inputFile = inputFile
|
|
|
self.remote = remote
|
|
|
self.errorMessage = errorMessage
|
|
|
- pass
|
|
|
-
|
|
|
|
|
|
def run(self):
|
|
|
scpcommand = ["scp",
|
|
@@ -115,7 +116,6 @@ class SCP:
|
|
|
"-o", "StrictHostKeyChecking=no",
|
|
|
"-i", self.sshkey_file, self.inputFile, self.user + "@" +
|
|
|
self.host + ":" + self.remote]
|
|
|
-
|
|
|
i = 1
|
|
|
while True:
|
|
|
try:
|
|
@@ -162,27 +162,33 @@ def main():
|
|
|
parser.add_argument('--key', type=str,
|
|
|
action='store', help='Path to GCE ssh key.')
|
|
|
|
|
|
- parser.add_argument('--cluster-prefix', type=str,
|
|
|
- action='store', help='Cluster name prefix.')
|
|
|
+ parser.add_argument('--cluster-suffix', type=str,
|
|
|
+ action='store', help='Cluster name suffix.')
|
|
|
|
|
|
parser.add_argument('--agent-prefix', type=str,
|
|
|
action='store', help='Agent name prefix.')
|
|
|
|
|
|
parser.add_argument('--agents-count', type=int,
|
|
|
- action='store', help='Agents count for whole cluster(multiples of 50).')
|
|
|
+ action='store', help='Agents count for whole cluster (multiples of 50).')
|
|
|
|
|
|
if len(sys.argv) <= 1:
|
|
|
parser.print_help()
|
|
|
- else:
|
|
|
- args = parser.parse_args()
|
|
|
- do_work(args)
|
|
|
+ sys.exit(-1)
|
|
|
+
|
|
|
+ args = parser.parse_args()
|
|
|
+ do_work(args)
|
|
|
|
|
|
-# base method which process cluster deployment
|
|
|
def deploy_cluster(args):
|
|
|
- number_of_nodes=args.agents_count/number_of_agents_on_host
|
|
|
+ """
|
|
|
+ Process cluster deployment
|
|
|
+ :param args: Command line args.
|
|
|
+ """
|
|
|
+ # When dividing, need to get the ceil.
|
|
|
+ number_of_nodes = ((args.agents_count - 1) / NUMBER_OF_AGENTS_ON_HOST) + 1
|
|
|
+
|
|
|
# trying to create cluster with needed params
|
|
|
- print "Creating cluster {0}-{1} with {2} large nodes on centos6...".format(args.cluster_prefix, cluster_name, str(number_of_nodes))
|
|
|
- execute_command(args, args.controller, "/usr/sbin/gce up {0}-{1} {2} --centos6 --large".format(args.cluster_prefix, cluster_name, str(number_of_nodes)),
|
|
|
+ print "Creating cluster {0}-{1} with {2} large nodes on centos6...".format(cluster_prefix, args.cluster_suffix, str(number_of_nodes))
|
|
|
+ execute_command(args, args.controller, "/usr/sbin/gce up {0}-{1} {2} --centos6 --large".format(cluster_prefix, args.cluster_suffix, str(number_of_nodes)),
|
|
|
"Failed to create cluster, probably not enough resources!", "-tt")
|
|
|
|
|
|
# VMs are not accessible immediately
|
|
@@ -190,19 +196,20 @@ def deploy_cluster(args):
|
|
|
|
|
|
# getting list of vms information like hostname and ip address
|
|
|
print "Getting list of virtual machines from cluster..."
|
|
|
+ # Dictionary from host name to IP
|
|
|
vms = get_vms_list(args)
|
|
|
|
|
|
# check number of nodes in cluster to be the same as user asked
|
|
|
print "Checking count of created nodes in cluster..."
|
|
|
if not vms or len(vms) < number_of_nodes:
|
|
|
- raise Exception("Can not bring up enough nodes. Requested {0}, but got: {1}. Probably not enough resources!".format(number_of_nodes, len(vms)))
|
|
|
+ raise Exception("Cannot bring up enough nodes. Requested {0}, but got {1}. Probably not enough resources!".format(number_of_nodes, len(vms)))
|
|
|
|
|
|
print "GCE cluster was successfully created!"
|
|
|
pretty_print_vms(vms)
|
|
|
|
|
|
# installing/starting ambari-server and ambari-agents on each host
|
|
|
server_host_name = sorted(vms.items())[0][0]
|
|
|
- server_installed=False
|
|
|
+ server_installed = False
|
|
|
|
|
|
print "Creating server.sh script (which will be executed on server to install/configure/start ambari-server and ambari-agent)..."
|
|
|
create_server_script(args, server_host_name)
|
|
@@ -211,14 +218,33 @@ def deploy_cluster(args):
|
|
|
create_agent_script(args, server_host_name)
|
|
|
|
|
|
time.sleep(10)
|
|
|
+
|
|
|
+ # If the user asks for a number of agents that is not a multiple of 50, then only create how many are needed instead
|
|
|
+ # of 50 on every VM.
|
|
|
+ num_agents_left_to_create = args.agents_count
|
|
|
+
|
|
|
+ start_num = 1
|
|
|
for (hostname, ip) in sorted(vms.items()):
|
|
|
+ num_agents_on_this_host = min(num_agents_left_to_create, NUMBER_OF_AGENTS_ON_HOST)
|
|
|
+
|
|
|
print "=========================="
|
|
|
- print "Working on {0}".format(hostname)
|
|
|
+ print "Working on VM {0} that will contain hosts %d - %d".format(hostname, start_num, start_num + num_agents_on_this_host - 1)
|
|
|
+
|
|
|
+ # The agent multiplier config will be different on each VM.
|
|
|
+
|
|
|
+ cmd_generate_multiplier_conf = "mkdir -p /etc/ambari-agent/conf/ ; printf \"start={0}\\nnum={1}\\nprefix={2}\" > /etc/ambari-agent/conf/agent-multiplier.conf".format(start_num, num_agents_on_this_host, args.agent_prefix)
|
|
|
+ start_num += num_agents_on_this_host
|
|
|
+ num_agents_left_to_create -= num_agents_on_this_host
|
|
|
+
|
|
|
if not server_installed:
|
|
|
remote_path = "/server.sh"
|
|
|
local_path = "server.sh"
|
|
|
print "Copying server.sh to {0}...".format(hostname)
|
|
|
put_file(args, ip, local_path, remote_path, "Failed to copy file!")
|
|
|
+
|
|
|
+ print "Generating agent-multiplier.conf"
|
|
|
+ execute_command(args, ip, cmd_generate_multiplier_conf, "Failed to generate agent-multiplier.conf on host {0}".format(hostname))
|
|
|
+
|
|
|
print "Executing remote ssh command (set correct permissions and start executing server.sh in separate process) on {0}...".format(hostname)
|
|
|
execute_command(args, ip, "cd /; chmod 777 server.sh; nohup ./server.sh >/server.log 2>&1 &",
|
|
|
"Install/configure/start server script failed!")
|
|
@@ -228,6 +254,10 @@ def deploy_cluster(args):
|
|
|
local_path = "agent.sh"
|
|
|
print "Copying agent.sh to {0}...".format(hostname)
|
|
|
put_file(args, ip, local_path, remote_path, "Failed to copy file!")
|
|
|
+
|
|
|
+ print "Generating agent-multiplier.conf"
|
|
|
+ execute_command(args, ip, cmd_generate_multiplier_conf, "Failed to generate agent-multiplier.conf on host {0}".format(hostname))
|
|
|
+
|
|
|
print "Executing remote ssh command (set correct permissions and start executing agent.sh in separate process) on {0}...".format(hostname)
|
|
|
execute_command(args, ip, "cd /; chmod 777 agent.sh; nohup ./agent.sh >/agent.log 2>&1 &",
|
|
|
"Install/configure start agent script failed!")
|
|
@@ -235,75 +265,90 @@ def deploy_cluster(args):
|
|
|
print "All scripts where successfully copied and started on all hosts. " \
|
|
|
"\nPay attention that server.sh script need 5 minutes to finish and agent.sh need 3 minutes!"
|
|
|
|
|
|
-# check if all required params were passed by user
|
|
|
-# if all needed params available then start cluster deploy
|
|
|
+
|
|
|
def do_work(args):
|
|
|
+ """
|
|
|
+ Check that all required args are passed in. If so, deploy the cluster.
|
|
|
+ :param args: Command line args
|
|
|
+ """
|
|
|
if not args.controller:
|
|
|
- raise Failure("GCE controller ip address not defined!")
|
|
|
+ raise Exception("GCE controller ip address is not defined!")
|
|
|
|
|
|
if not args.key:
|
|
|
- raise Failure("Path to gce ssh key not defined!")
|
|
|
+ raise Exception("Path to gce ssh key is not defined!")
|
|
|
|
|
|
- if not args.cluster_prefix:
|
|
|
- raise Failure("Cluster name prefix not defined!")
|
|
|
+ if not args.cluster_suffix:
|
|
|
+ raise Exception("Cluster name suffix is not defined!")
|
|
|
|
|
|
if not args.agent_prefix:
|
|
|
- raise Failure("Agent name prefix not defined!")
|
|
|
+ raise Exception("Agent name prefix is not defined!")
|
|
|
|
|
|
if not args.agents_count:
|
|
|
- raise Failure("Agents count for whole cluster(multiples of 50) not defined!")
|
|
|
+ raise Exception("Agents count for whole cluster is not defined (will put 50 Agents per VM)!")
|
|
|
|
|
|
deploy_cluster(args)
|
|
|
|
|
|
-# creating server.sh script in the same dir where current script is located
|
|
|
-# server.sh script will install, configure and start ambari-server and ambari-agent on host
|
|
|
-def create_server_script(args, server_host_name):
|
|
|
- file = open("server.sh", "w")
|
|
|
-
|
|
|
- file.write("#!/bin/bash\n")
|
|
|
-
|
|
|
- file.write("wget -O /etc/yum.repos.d/ambari.repo {0}\n".format(ambari_repo_file_url))
|
|
|
- file.write("yum clean all; yum install git ambari-server ambari-agent -y\n")
|
|
|
- file.write("cd /home; git clone https://github.com/apache/ambari.git\n")
|
|
|
-
|
|
|
- file.write("cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-server/resources/stacks/PERF\n")
|
|
|
- file.write("cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-agent/cache/stacks/PERF\n")
|
|
|
-
|
|
|
- file.write("ambari-server setup -s\n")
|
|
|
- file.write("sed -i -e 's/false/true/g' /var/lib/ambari-server/resources/stacks/PERF/1.0/metainfo.xml\n")
|
|
|
- file.write("ambari-server start --skip-database-check\n")
|
|
|
|
|
|
- file.write("sed -i -e 's/hostname=localhost/hostname={0}/g' /etc/ambari-agent/conf/ambari-agent.ini\n".format(server_host_name))
|
|
|
- file.write("sed -i -e 's/agent]/agent]\\nhostname_script={0}\\npublic_hostname_script={1}\\n/1' /etc/ambari-agent/conf/ambari-agent.ini\n".format(hostname_script, public_hostname_script))
|
|
|
- file.write("printf \"start={0}\\nnum={1}\\nprefix={2}\" > /etc/ambari-agent/conf/agent-multiplier.conf\n".format(start_number, number_of_agents_on_host, args.agent_prefix))
|
|
|
- file.write("python /home/ambari/ambari-agent/conf/unix/agent-multiplier.py start\n")
|
|
|
-
|
|
|
- file.write("exit 0")
|
|
|
+def create_server_script(args, server_host_name):
|
|
|
+ """
|
|
|
+ Creating server.sh script in the same dir where current script is located
|
|
|
+ server.sh script will install, configure and start ambari-server and ambari-agent on host
|
|
|
+ :param args: Command line args
|
|
|
+ :param server_host_name: Server host name
|
|
|
+ """
|
|
|
+
|
|
|
+ contents = "#!/bin/bash\n" + \
|
|
|
+ "wget -O /etc/yum.repos.d/ambari.repo {0}\n".format(ambari_repo_file_url) + \
|
|
|
+ "yum clean all; yum install git ambari-server ambari-agent -y\n" + \
|
|
|
+ "cd /home; git clone https://github.com/apache/ambari.git\n" + \
|
|
|
+ "cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-server/resources/stacks/PERF\n" + \
|
|
|
+ "cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-agent/cache/stacks/PERF\n" + \
|
|
|
+ "ambari-server setup -s\n" + \
|
|
|
+ "sed -i -e 's/false/true/g' /var/lib/ambari-server/resources/stacks/PERF/1.0/metainfo.xml\n" + \
|
|
|
+ "ambari-server start --skip-database-check\n" + \
|
|
|
+ "sed -i -e 's/hostname=localhost/hostname={0}/g' /etc/ambari-agent/conf/ambari-agent.ini\n".format(server_host_name) + \
|
|
|
+ "sed -i -e 's/agent]/agent]\\nhostname_script={0}\\npublic_hostname_script={1}\\n/1' /etc/ambari-agent/conf/ambari-agent.ini\n".format(hostname_script, public_hostname_script) + \
|
|
|
+ "python /home/ambari/ambari-agent/conf/unix/agent-multiplier.py start\n" + \
|
|
|
+ "exit 0"
|
|
|
+
|
|
|
+ with open("server.sh", "w") as f:
|
|
|
+ f.write(contents)
|
|
|
|
|
|
- file.close()
|
|
|
|
|
|
-# creating agent.sh script in the same dir where current script is located
|
|
|
-# agent.sh script will install, configure and start ambari-agent on host
|
|
|
def create_agent_script(args, server_host_name):
|
|
|
- file = open("agent.sh", "w")
|
|
|
-
|
|
|
- file.write("#!/bin/bash\n")
|
|
|
-
|
|
|
- file.write("wget -O /etc/yum.repos.d/ambari.repo {0}\n".format(ambari_repo_file_url))
|
|
|
- file.write("yum clean all; yum install git ambari-agent -y\n")
|
|
|
- file.write("cd /home; git clone https://github.com/apache/ambari.git\n")
|
|
|
- file.write("cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-agent/cache/stacks/PERF\n")
|
|
|
+ """
|
|
|
+ Creating agent.sh script in the same dir where current script is located
|
|
|
+ agent.sh script will install, configure and start ambari-agent on host
|
|
|
+ :param args: Command line args
|
|
|
+ :param server_host_name: Server host name
|
|
|
+ """
|
|
|
+
|
|
|
+ # TODO, instead of cloning Ambari repo on each VM, do it on the server once and distribute to all of the agents.
|
|
|
+ contents = "#!/bin/bash\n" + \
|
|
|
+ "wget -O /etc/yum.repos.d/ambari.repo {0}\n".format(ambari_repo_file_url) + \
|
|
|
+ "yum clean all; yum install git ambari-agent -y\n" + \
|
|
|
+ "cd /home; git clone https://github.com/apache/ambari.git\n" + \
|
|
|
+ "cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-agent/cache/stacks/PERF\n" + \
|
|
|
+ "sed -i -e 's/hostname=localhost/hostname={0}/g' /etc/ambari-agent/conf/ambari-agent.ini\n".format(server_host_name) + \
|
|
|
+ "sed -i -e 's/agent]/agent]\\nhostname_script={0}\\npublic_hostname_script={1}\\n/1' /etc/ambari-agent/conf/ambari-agent.ini\n".format(hostname_script, public_hostname_script) + \
|
|
|
+ "python /home/ambari/ambari-agent/conf/unix/agent-multiplier.py start\n" + \
|
|
|
+ "exit 0"
|
|
|
+
|
|
|
+ with open("agent.sh", "w") as f:
|
|
|
+ f.write(contents)
|
|
|
|
|
|
- file.write("sed -i -e 's/hostname=localhost/hostname={0}/g' /etc/ambari-agent/conf/ambari-agent.ini\n".format(server_host_name))
|
|
|
- file.write("sed -i -e 's/agent]/agent]\\nhostname_script={0}\\npublic_hostname_script={1}\\n/1' /etc/ambari-agent/conf/ambari-agent.ini\n".format(hostname_script, public_hostname_script))
|
|
|
- file.write("printf \"start={0}\\nnum={1}\\nprefix={2}\" > /etc/ambari-agent/conf/agent-multiplier.conf\n".format(start_number, number_of_agents_on_host, args.agent_prefix))
|
|
|
- file.write("python /home/ambari/ambari-agent/conf/unix/agent-multiplier.py start\n")
|
|
|
- file.write("exit 0")
|
|
|
|
|
|
- file.close()
|
|
|
-
|
|
|
-# method to execute ssh commands via SSH class
|
|
|
def execute_command(args, ip, cmd, fail_message, custom_option='', login='root'):
|
|
|
+ """
|
|
|
+ Method to execute ssh commands via SSH class
|
|
|
+ :param args: Command line args
|
|
|
+ :param ip: IP to ssh to
|
|
|
+ :param cmd: Command to execute
|
|
|
+ :param fail_message: In case of an error, what to report
|
|
|
+ :param custom_option: Custom flags
|
|
|
+ :param login: Login user
|
|
|
+ :return: Return execute log message
|
|
|
+ """
|
|
|
ssh = SSH(login, args.key, ip, cmd, custom_option, fail_message)
|
|
|
ssh_result = ssh.run()
|
|
|
status_code = ssh_result["exitstatus"]
|
|
@@ -312,8 +357,18 @@ def execute_command(args, ip, cmd, fail_message, custom_option='', login='root')
|
|
|
|
|
|
return ssh_result["log"][0]
|
|
|
|
|
|
-# method to copy file from local to remote host via SCP class
|
|
|
+
|
|
|
def put_file(args, ip, local_file, remote_file, fail_message, login='root'):
|
|
|
+ """
|
|
|
+ Method to copy file from local to remote host via SCP class
|
|
|
+ :param args: Command line args
|
|
|
+ :param ip: IP to ssh to
|
|
|
+ :param local_file: Path to local file
|
|
|
+ :param remote_file: Path to remote file
|
|
|
+ :param fail_message: In case of an error, what to report
|
|
|
+ :param login: Login user.
|
|
|
+ :return: Return copy log message
|
|
|
+ """
|
|
|
scp = SCP(login, args.key, ip, local_file,
|
|
|
remote_file, fail_message)
|
|
|
scp_result = scp.run()
|
|
@@ -323,10 +378,15 @@ def put_file(args, ip, local_file, remote_file, fail_message, login='root'):
|
|
|
|
|
|
return scp_result["log"][0]
|
|
|
|
|
|
-# method to parse "gce fqdn {cluster-name}" command output and get hosts and ips pairs for every host in cluster
|
|
|
+
|
|
|
def get_vms_list(args):
|
|
|
- gce_fqdb_cmd = '/usr/sbin/gce fqdn {0}-{1}'.format(
|
|
|
- args.cluster_prefix, cluster_name)
|
|
|
+ """
|
|
|
+ Method to parse "gce fqdn {cluster-name}" command output and get hosts and ips pairs for every host in cluster
|
|
|
+ :param args: Command line args
|
|
|
+ :return: Mapping of VM host name to ip.
|
|
|
+ """
|
|
|
+
|
|
|
+ gce_fqdb_cmd = '/usr/sbin/gce fqdn {0}-{1}'.format(cluster_prefix, args.cluster_suffix)
|
|
|
out = execute_command(args, args.controller, gce_fqdb_cmd, "Failed to get VMs list!", "-tt")
|
|
|
lines = out.split('\n')
|
|
|
#print "LINES=" + str(lines)
|
|
@@ -339,15 +399,15 @@ def get_vms_list(args):
|
|
|
if match:
|
|
|
result[match.group(2)] = match.group(1)
|
|
|
else:
|
|
|
- raise Exception('Can not parse "{0}"'.format(s))
|
|
|
+ raise Exception('Cannot parse "{0}"'.format(s))
|
|
|
return result
|
|
|
else:
|
|
|
- raise Exception('Can not parse "{0}"'.format(lines))
|
|
|
+ raise Exception('Cannot parse "{0}"'.format(lines))
|
|
|
|
|
|
|
|
|
def pretty_print_vms(vms):
|
|
|
print "----------------------------"
|
|
|
- print "server ip: {0}".format(sorted(vms.items())[0][1])
|
|
|
+ print "Server IP: {0}".format(sorted(vms.items())[0][1])
|
|
|
print "Hostnames of nodes in cluster:"
|
|
|
for (hostname, ip) in sorted(vms.items()):
|
|
|
print hostname
|