فهرست منبع

AMBARI-19003. Perf: Fix deploy-gce-perf-cluster.py to deploy separate server onto own cluster with different settings for more cores and MySQL DB (alejandro)

Alejandro Fernandez 8 سال پیش
والد
کامیت
052da577bc
1فایلهای تغییر یافته به همراه169 افزوده شده و 74 حذف شده
  1. 169 74
      contrib/utils/perf/deploy-gce-perf-cluster.py

+ 169 - 74
contrib/utils/perf/deploy-gce-perf-cluster.py

@@ -148,6 +148,7 @@ class SCP:
 
 
     return {"exitstatus": scpstat.returncode, "log": log, "errormsg": errorMsg}
     return {"exitstatus": scpstat.returncode, "log": log, "errormsg": errorMsg}
 
 
+
 # main method to parse arguments from user and start work
 # main method to parse arguments from user and start work
 def main():
 def main():
   parser = argparse.ArgumentParser(
   parser = argparse.ArgumentParser(
@@ -178,6 +179,30 @@ def main():
   args = parser.parse_args()
   args = parser.parse_args()
   do_work(args)
   do_work(args)
 
 
+
+def do_work(args):
+  """
+  Check that all required args are passed in. If so, deploy the cluster.
+  :param args: Command line args
+  """
+  if not args.controller:
+    raise Exception("GCE controller ip address is not defined!")
+
+  if not args.key:
+    raise Exception("Path to gce ssh key is not defined!")
+
+  if not args.cluster_suffix:
+    raise Exception("Cluster name suffix is not defined!")
+
+  if not args.agent_prefix:
+    raise Exception("Agent name prefix is not defined!")
+
+  if not args.agents_count:
+    raise Exception("Agents count for whole cluster is not defined (will put 50 Agents per VM)!")
+
+  deploy_cluster(args)
+
+
 def deploy_cluster(args):
 def deploy_cluster(args):
   """
   """
   Process cluster deployment
   Process cluster deployment
@@ -186,49 +211,54 @@ def deploy_cluster(args):
   # When dividing, need to get the ceil.
   # When dividing, need to get the ceil.
   number_of_nodes = ((args.agents_count - 1) / NUMBER_OF_AGENTS_ON_HOST) + 1
   number_of_nodes = ((args.agents_count - 1) / NUMBER_OF_AGENTS_ON_HOST) + 1
 
 
-  # trying to create cluster with needed params
-  print "Creating cluster {0}-{1} with {2} large nodes on centos6...".format(cluster_prefix, args.cluster_suffix, str(number_of_nodes))
-  execute_command(args, args.controller, "/usr/sbin/gce up {0}-{1} {2} --centos6 --large".format(cluster_prefix, args.cluster_suffix, str(number_of_nodes)),
-                  "Failed to create cluster, probably not enough resources!", "-tt")
-
-  # VMs are not accessible immediately
-  time.sleep(10)
+  # In case of an error after creating VMs, can simply comment out this function to run again without creating VMs.
+  create_vms(args, number_of_nodes)
 
 
   # getting list of vms information like hostname and ip address
   # getting list of vms information like hostname and ip address
   print "Getting list of virtual machines from cluster..."
   print "Getting list of virtual machines from cluster..."
   # Dictionary from host name to IP
   # Dictionary from host name to IP
-  vms = get_vms_list(args)
+  (server_dict, agents_dict) = get_vms_list(args)
 
 
   # check number of nodes in cluster to be the same as user asked
   # check number of nodes in cluster to be the same as user asked
   print "Checking count of created nodes in cluster..."
   print "Checking count of created nodes in cluster..."
-  if not vms or len(vms) < number_of_nodes:
-    raise Exception("Cannot bring up enough nodes. Requested {0}, but got {1}. Probably not enough resources!".format(number_of_nodes, len(vms)))
+  if not agents_dict or len(agents_dict) < number_of_nodes:
+    raise Exception("Cannot bring up enough nodes. Requested {0}, but got {1}. Probably not enough resources!".format(number_of_nodes, len(agents_dict)))
 
 
-  print "GCE cluster was successfully created!"
-  pretty_print_vms(vms)
+  print "GCE cluster was successfully created!\n"
 
 
   # installing/starting ambari-server and ambari-agents on each host
   # installing/starting ambari-server and ambari-agents on each host
-  server_host_name = sorted(vms.items())[0][0]
-  server_installed = False
+  server_item = server_dict.items()[0]
+  server_host_name = server_item[0]
+  server_ip = server_item[1]
+  print "=========================="
+  print "Server Hostname: %s" % server_host_name
+  print "Server IP: %s" % server_ip
+  print "==========================\n"
+
+  # Sort the agents by hostname into a list.
+  sorted_agents = sort_hosts(agents_dict)
+  pretty_print_vms(sorted_agents)
 
 
-  print "Creating server.sh script (which will be executed on server to install/configure/start ambari-server and ambari-agent)..."
-  create_server_script(args, server_host_name)
+  print "Creating server.sh script (which will be executed on server to install/configure/start ambari-server)..."
+  create_server_script(server_host_name)
 
 
   print "Creating agent.sh script (which will be executed on agent hosts to install/configure/start ambari-agent..."
   print "Creating agent.sh script (which will be executed on agent hosts to install/configure/start ambari-agent..."
-  create_agent_script(args, server_host_name)
+  create_agent_script(server_host_name)
 
 
   time.sleep(10)
   time.sleep(10)
 
 
+  prepare_server(args, server_host_name, server_ip)
+
   # If the user asks for a number of agents that is not a multiple of 50, then only create how many are needed instead
   # If the user asks for a number of agents that is not a multiple of 50, then only create how many are needed instead
   # of 50 on every VM.
   # of 50 on every VM.
   num_agents_left_to_create = args.agents_count
   num_agents_left_to_create = args.agents_count
-
   start_num = 1
   start_num = 1
-  for (hostname, ip) in sorted(vms.items()):
+
+  for (hostname, ip) in sorted_agents:
     num_agents_on_this_host = min(num_agents_left_to_create, NUMBER_OF_AGENTS_ON_HOST)
     num_agents_on_this_host = min(num_agents_left_to_create, NUMBER_OF_AGENTS_ON_HOST)
 
 
     print "=========================="
     print "=========================="
-    print "Working on VM {0} that will contain hosts %d - %d".format(hostname, start_num, start_num + num_agents_on_this_host - 1)
+    print "Working on VM {0} that will contain hosts {1} - {2}".format(hostname, start_num, start_num + num_agents_on_this_host - 1)
 
 
     # The agent multiplier config will be different on each VM.
     # The agent multiplier config will be different on each VM.
 
 
@@ -236,90 +266,117 @@ def deploy_cluster(args):
     start_num += num_agents_on_this_host
     start_num += num_agents_on_this_host
     num_agents_left_to_create -= num_agents_on_this_host
     num_agents_left_to_create -= num_agents_on_this_host
 
 
-    if not server_installed:
-      remote_path = "/server.sh"
-      local_path = "server.sh"
-      print "Copying server.sh to {0}...".format(hostname)
-      put_file(args, ip, local_path, remote_path, "Failed to copy file!")
-
-      print "Generating agent-multiplier.conf"
-      execute_command(args, ip, cmd_generate_multiplier_conf, "Failed to generate agent-multiplier.conf on host {0}".format(hostname))
-
-      print "Executing remote ssh command (set correct permissions and start executing server.sh in separate process) on {0}...".format(hostname)
-      execute_command(args, ip, "cd /; chmod 777 server.sh; nohup ./server.sh >/server.log 2>&1 &",
-                    "Install/configure/start server script failed!")
-      server_installed = True
-    else:
-      remote_path = "/agent.sh"
-      local_path = "agent.sh"
-      print "Copying agent.sh to {0}...".format(hostname)
-      put_file(args, ip, local_path, remote_path, "Failed to copy file!")
-
-      print "Generating agent-multiplier.conf"
-      execute_command(args, ip, cmd_generate_multiplier_conf, "Failed to generate agent-multiplier.conf on host {0}".format(hostname))
-
-      print "Executing remote ssh command (set correct permissions and start executing agent.sh in separate process) on {0}...".format(hostname)
-      execute_command(args, ip, "cd /; chmod 777 agent.sh; nohup ./agent.sh >/agent.log 2>&1 &",
-                    "Install/configure start agent script failed!")
+    prepare_agent(args, hostname, ip, cmd_generate_multiplier_conf)
 
 
+  pass
   print "All scripts where successfully copied and started on all hosts. " \
   print "All scripts where successfully copied and started on all hosts. " \
         "\nPay attention that server.sh script need 5 minutes to finish and agent.sh need 3 minutes!"
         "\nPay attention that server.sh script need 5 minutes to finish and agent.sh need 3 minutes!"
 
 
 
 
-def do_work(args):
+def create_vms(args, number_of_nodes):
   """
   """
-  Check that all required args are passed in. If so, deploy the cluster.
+  Request the server and VMs for the agents from GCE.
   :param args: Command line args
   :param args: Command line args
+  :param number_of_nodes: Number of VMs to request.
   """
   """
-  if not args.controller:
-    raise Exception("GCE controller ip address is not defined!")
+  print "Creating server VM {0}-server-{1} with xxlarge nodes on centos6...".format(cluster_prefix, args.cluster_suffix)
+  execute_command(args, args.controller, "/usr/sbin/gce up {0}-server-{1} 1 --centos6 --xxlarge".format(cluster_prefix, args.cluster_suffix),
+                  "Failed to create server, probably not enough resources!", "-tt")
+  time.sleep(10)
 
 
-  if not args.key:
-    raise Exception("Path to gce ssh key is not defined!")
+  # trying to create cluster with needed params
+  print "Creating agent VMs {0}-agent-{1} with {2} large nodes on centos6...".format(cluster_prefix, args.cluster_suffix, str(number_of_nodes))
+  execute_command(args, args.controller, "/usr/sbin/gce up {0}-agent-{1} {2} --centos6 --large".format(cluster_prefix, args.cluster_suffix, str(number_of_nodes)),
+                  "Failed to create cluster VMs, probably not enough resources!", "-tt")
 
 
-  if not args.cluster_suffix:
-    raise Exception("Cluster name suffix is not defined!")
+  # VMs are not accessible immediately
+  time.sleep(10)
 
 
-  if not args.agent_prefix:
-    raise Exception("Agent name prefix is not defined!")
 
 
-  if not args.agents_count:
-    raise Exception("Agents count for whole cluster is not defined (will put 50 Agents per VM)!")
+def prepare_server(args, hostname, ip):
+  remote_path = "/server.sh"
+  local_path = "server.sh"
+  print "Copying server.sh to {0}...".format(hostname)
+  put_file(args, ip, local_path, remote_path, "Failed to copy file!")
 
 
-  deploy_cluster(args)
+  print "Executing remote ssh command (set correct permissions and start executing server.sh in separate process) on {0}...".format(hostname)
+  execute_command(args, ip, "cd /; chmod 777 server.sh; nohup ./server.sh >/server.log 2>&1 &",
+                  "Install/configure/start server script failed!")
 
 
 
 
-def create_server_script(args, server_host_name):
+def prepare_agent(args, hostname, ip, cmd_generate_multiplier_conf):
+  remote_path = "/agent.sh"
+  local_path = "agent.sh"
+  print "Copying agent.sh to {0}...".format(hostname)
+  put_file(args, ip, local_path, remote_path, "Failed to copy file!")
+
+  print "Generating agent-multiplier.conf"
+  execute_command(args, ip, cmd_generate_multiplier_conf, "Failed to generate agent-multiplier.conf on host {0}".format(hostname))
+
+  print "Executing remote ssh command (set correct permissions and start executing agent.sh in separate process) on {0}...".format(hostname)
+  execute_command(args, ip, "cd /; chmod 777 agent.sh; nohup ./agent.sh >/agent.log 2>&1 &",
+                  "Install/configure start agent script failed!")
+
+
+def create_server_script(server_host_name):
   """
   """
   Creating server.sh script in the same dir where current script is located
   Creating server.sh script in the same dir where current script is located
   server.sh script will install, configure and start ambari-server and ambari-agent on host
   server.sh script will install, configure and start ambari-server and ambari-agent on host
-  :param args: Command line args
   :param server_host_name: Server host name
   :param server_host_name: Server host name
   """
   """
 
 
+  # ambari-server setup <options> may not work property, so doing several calls like
+  # echo "arg=value" >> .../ambari.properties
+
   contents = "#!/bin/bash\n" + \
   contents = "#!/bin/bash\n" + \
   "wget -O /etc/yum.repos.d/ambari.repo {0}\n".format(ambari_repo_file_url) + \
   "wget -O /etc/yum.repos.d/ambari.repo {0}\n".format(ambari_repo_file_url) + \
-  "yum clean all; yum install git ambari-server ambari-agent -y\n" + \
-  "cd /home; git clone https://github.com/apache/ambari.git\n" + \
+  "yum clean all; yum install git ambari-server -y\n" + \
+  "mkdir /home ; cd /home ; git clone https://github.com/apache/ambari.git\n" + \
   "cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-server/resources/stacks/PERF\n" + \
   "cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-server/resources/stacks/PERF\n" + \
   "cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-agent/cache/stacks/PERF\n" + \
   "cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-agent/cache/stacks/PERF\n" + \
+  "\n" + \
+  "\n" + \
+  "yum install mysql-connector-java* -y\n" + \
+  "yum install mysql-server -y\n" + \
+  "service mysqld start\n" + \
+  "mysql -uroot -e \"CREATE DATABASE ambari;\"\n" + \
+  "mysql -uroot -e \"SOURCE /var/lib/ambari-server/resources/Ambari-DDL-MySQL-CREATE.sql;\" ambari\n" + \
+  "mysql -uroot -e \"CREATE USER 'ambari'@'%' IDENTIFIED BY 'bigdata';\"\n" + \
+  "mysql -uroot -e \"GRANT ALL PRIVILEGES ON *.* TO 'ambari'@'%%';\"\n" + \
+  "mysql -uroot -e \"CREATE USER 'ambari'@'localhost' IDENTIFIED BY 'bigdata';\"\n" + \
+  "mysql -uroot -e \"GRANT ALL PRIVILEGES ON *.* TO 'ambari'@'localhost';\"\n" + \
+  "mysql -uroot -e \"CREATE USER 'ambari'@'{0}' IDENTIFIED BY 'bigdata';\"\n".format(server_host_name) + \
+  "mysql -uroot -e \"GRANT ALL PRIVILEGES ON *.* TO 'ambari'@'{0}';\"\n".format(server_host_name) + \
+  "mysql -uroot -e \"FLUSH PRIVILEGES;\"\n" + \
+  "\n" + \
+  "\n" + \
   "ambari-server setup -s\n" + \
   "ambari-server setup -s\n" + \
+  "ambari-server setup --database mysql --jdbc-db=mysql --jdbc-driver=/usr/share/java/mysql-connector-java.jar --databasehost=localhost --databaseport=3306 --databasename=ambari --databaseusername=ambari --databasepassword=bigdata\n" + \
+  "sed -i -e 's/=postgres/=mysql/g' /etc/ambari-server/conf/ambari.properties\n" + \
+  "sed -i -e 's/server.persistence.type=local/server.persistence.type=remote/g' /etc/ambari-server/conf/ambari.properties\n" + \
+  "sed -i -e 's/local.database.user=postgres//g' /etc/ambari-server/conf/ambari.properties\n" + \
+  "sed -i -e 's/server.jdbc.postgres.schema=ambari//g' /etc/ambari-server/conf/ambari.properties\n" + \
   "sed -i -e 's/false/true/g' /var/lib/ambari-server/resources/stacks/PERF/1.0/metainfo.xml\n" + \
   "sed -i -e 's/false/true/g' /var/lib/ambari-server/resources/stacks/PERF/1.0/metainfo.xml\n" + \
+  "\n" + \
+  "echo 'server.jdbc.driver=com.mysql.jdbc.Driver' >> /etc/ambari-server/conf/ambari.properties\n" + \
+  "echo 'server.jdbc.rca.url=jdbc:mysql://{0}:3306/ambari' >> /etc/ambari-server/conf/ambari.properties\n".format(server_host_name) + \
+  "echo 'server.jdbc.rca.driver=com.mysql.jdbc.Driver' >> /etc/ambari-server/conf/ambari.properties\n" + \
+  "echo 'server.jdbc.url=jdbc:mysql://{0}:3306/ambari' >> /etc/ambari-server/conf/ambari.properties\n".format(server_host_name) + \
+  "echo 'server.jdbc.port=3306' >> /etc/ambari-server/conf/ambari.properties\n" + \
+  "echo 'server.jdbc.hostname=localhost' >> /etc/ambari-server/conf/ambari.properties\n" + \
+  "echo 'server.jdbc.driver.path=/usr/share/java/mysql-connector-java.jar' >> /etc/ambari-server/conf/ambari.properties\n" + \
+  "\n" + \
   "ambari-server start --skip-database-check\n" + \
   "ambari-server start --skip-database-check\n" + \
-  "sed -i -e 's/hostname=localhost/hostname={0}/g' /etc/ambari-agent/conf/ambari-agent.ini\n".format(server_host_name) + \
-  "sed -i -e 's/agent]/agent]\\nhostname_script={0}\\npublic_hostname_script={1}\\n/1' /etc/ambari-agent/conf/ambari-agent.ini\n".format(hostname_script, public_hostname_script) + \
-  "python /home/ambari/ambari-agent/conf/unix/agent-multiplier.py start\n" + \
   "exit 0"
   "exit 0"
 
 
   with open("server.sh", "w") as f:
   with open("server.sh", "w") as f:
     f.write(contents)
     f.write(contents)
 
 
 
 
-def create_agent_script(args, server_host_name):
+def create_agent_script(server_host_name):
   """
   """
   Creating agent.sh script in the same dir where current script is located
   Creating agent.sh script in the same dir where current script is located
   agent.sh script will install, configure and start ambari-agent on host
   agent.sh script will install, configure and start ambari-agent on host
-  :param args: Command line args
   :param server_host_name: Server host name
   :param server_host_name: Server host name
   """
   """
 
 
@@ -327,7 +384,7 @@ def create_agent_script(args, server_host_name):
   contents = "#!/bin/bash\n" + \
   contents = "#!/bin/bash\n" + \
   "wget -O /etc/yum.repos.d/ambari.repo {0}\n".format(ambari_repo_file_url) + \
   "wget -O /etc/yum.repos.d/ambari.repo {0}\n".format(ambari_repo_file_url) + \
   "yum clean all; yum install git ambari-agent -y\n" + \
   "yum clean all; yum install git ambari-agent -y\n" + \
-  "cd /home; git clone https://github.com/apache/ambari.git\n" + \
+  "mkdir /home ; cd /home; git clone https://github.com/apache/ambari.git\n" + \
   "cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-agent/cache/stacks/PERF\n" + \
   "cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-agent/cache/stacks/PERF\n" + \
   "sed -i -e 's/hostname=localhost/hostname={0}/g' /etc/ambari-agent/conf/ambari-agent.ini\n".format(server_host_name) + \
   "sed -i -e 's/hostname=localhost/hostname={0}/g' /etc/ambari-agent/conf/ambari-agent.ini\n".format(server_host_name) + \
   "sed -i -e 's/agent]/agent]\\nhostname_script={0}\\npublic_hostname_script={1}\\n/1' /etc/ambari-agent/conf/ambari-agent.ini\n".format(hostname_script, public_hostname_script) + \
   "sed -i -e 's/agent]/agent]\\nhostname_script={0}\\npublic_hostname_script={1}\\n/1' /etc/ambari-agent/conf/ambari-agent.ini\n".format(hostname_script, public_hostname_script) + \
@@ -380,13 +437,28 @@ def put_file(args, ip, local_file, remote_file, fail_message, login='root'):
 
 
 
 
 def get_vms_list(args):
 def get_vms_list(args):
+  """
+  Get tuple of (x, y) where 
+  x = dictionary from single server host name to ip
+  y = dictionary from multiple agent host names to ip
+  :param args: Command line arguments
+  :return: Tuple of dictionaries of hostnames and ip for server and agents.
+  """
+  # Get the server.
+  server = __get_vms_list_from_name(args, "{0}-server-{1}".format(cluster_prefix, args.cluster_suffix))
+
+  # Get the agents
+  agents = __get_vms_list_from_name(args, "{0}-agent-{1}".format(cluster_prefix, args.cluster_suffix))
+
+  return (server, agents)
+
+def __get_vms_list_from_name(args, cluster_name):
   """
   """
   Method to parse "gce fqdn {cluster-name}" command output and get hosts and ips pairs for every host in cluster
   Method to parse "gce fqdn {cluster-name}" command output and get hosts and ips pairs for every host in cluster
   :param args: Command line args
   :param args: Command line args
   :return: Mapping of VM host name to ip.
   :return: Mapping of VM host name to ip.
   """
   """
-
-  gce_fqdb_cmd = '/usr/sbin/gce fqdn {0}-{1}'.format(cluster_prefix, args.cluster_suffix)
+  gce_fqdb_cmd = '/usr/sbin/gce fqdn {0}'.format(cluster_name)
   out = execute_command(args, args.controller, gce_fqdb_cmd, "Failed to get VMs list!", "-tt")
   out = execute_command(args, args.controller, gce_fqdb_cmd, "Failed to get VMs list!", "-tt")
   lines = out.split('\n')
   lines = out.split('\n')
   #print "LINES=" + str(lines)
   #print "LINES=" + str(lines)
@@ -405,13 +477,36 @@ def get_vms_list(args):
     raise Exception('Cannot parse "{0}"'.format(lines))
     raise Exception('Cannot parse "{0}"'.format(lines))
 
 
 
 
+def sort_hosts(hosts):
+  """
+  Sort the hosts by name and take into account the numbers.
+  :param hosts: Dictionary from host name (e.g., perf-9-test, perf-62-test), to the IP
+  :return: Sorted list of tuples
+  """
+  host_names = hosts.keys()
+  sorted_host_tuples = [(None, None),] * len(hosts)
+
+  pattern = re.compile(".*?-agent-.*?(\d+)")
+  for host_name in host_names:
+    m = pattern.match(host_name)
+    if m and len(m.groups()) == 1:
+      number = int(m.group(1))
+      ip = hosts[host_name]
+      sorted_host_tuples[number - 1] = (host_name, ip)
+
+  return sorted_host_tuples
+
+
 def pretty_print_vms(vms):
 def pretty_print_vms(vms):
-  print "----------------------------"
-  print "Server IP: {0}".format(sorted(vms.items())[0][1])
+  """
+  Pretty print the VMs hostnames
+  :param vms: List of tuples (hostname, ip)
+  """
+  print "=========================="
   print "Hostnames of nodes in cluster:"
   print "Hostnames of nodes in cluster:"
-  for (hostname, ip) in sorted(vms.items()):
+  for (hostname, ip) in vms:
     print hostname
     print hostname
-  print "----------------------------"
+  print "==========================\n"
 
 
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":