deploy-gce-perf-cluster.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
  1. #!/usr/bin/env python
  2. """
  3. Licensed to the Apache Software Foundation (ASF) under one
  4. or more contributor license agreements. See the NOTICE file
  5. distributed with this work for additional information
  6. regarding copyright ownership. The ASF licenses this file
  7. to you under the Apache License, Version 2.0 (the
  8. "License"); you may not use this file except in compliance
  9. with the License. You may obtain a copy of the License at
  10. http://www.apache.org/licenses/LICENSE-2.0
  11. Unless required by applicable law or agreed to in writing, software
  12. distributed under the License is distributed on an "AS IS" BASIS,
  13. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. See the License for the specific language governing permissions and
  15. limitations under the License.
  16. """
  17. import argparse
  18. import os
  19. import subprocess
  20. import sys
  21. import pprint
  22. import time
  23. import traceback
  24. import re
  25. import socket
  26. cluster_prefix = "perf"
  27. ambari_repo_file_url = "http://s3.amazonaws.com/dev.hortonworks.com/ambari/centos6/2.x/updates/2.5.0.0/ambaribn.repo"
  28. public_hostname_script = "foo"
  29. hostname_script = "foo"
  30. NUMBER_OF_AGENTS_ON_HOST = 50
  31. class SSH:
  32. """
  33. Ssh implementation of this
  34. """
  35. def __init__(self, user, sshkey_file, host, command, custom_option='', errorMessage = None):
  36. self.user = user
  37. self.sshkey_file = sshkey_file
  38. self.host = host
  39. self.command = command
  40. self.errorMessage = errorMessage
  41. self.custom_option = custom_option
  42. def run(self):
  43. sshcommand = ["ssh",
  44. "-o", "ConnectTimeOut=180",
  45. "-o", "StrictHostKeyChecking=no",
  46. "-o", "BatchMode=yes",
  47. self.custom_option,
  48. "-i", self.sshkey_file,
  49. self.user + "@" + self.host, self.command]
  50. if not self.custom_option:
  51. del sshcommand[7]
  52. i = 1
  53. while True:
  54. try:
  55. sshstat = subprocess.Popen(sshcommand, stdout=subprocess.PIPE,
  56. stderr=subprocess.PIPE)
  57. log = sshstat.communicate()
  58. if sshstat.returncode != 0:
  59. print "Executing SSH command on {0} failed: {1}".format(self.host, log)
  60. print "\nRetrying SSH command one more time!"
  61. if i >= 3:
  62. break
  63. i += 1
  64. time.sleep(10)
  65. continue
  66. break
  67. except:
  68. print "Could not SSH to {0}, waiting for it to start".format(self.host)
  69. i += 1
  70. time.sleep(10)
  71. if i >= 3:
  72. print "Could not execute remote ssh command: " + ' '.join(sshcommand)
  73. raise Exception("Could not connect to {0}. Giving up with erros: {1}".format(self.host, log))
  74. errorMsg = log[1]
  75. if self.errorMessage and sshstat.returncode != 0:
  76. errorMsg = self.errorMessage + "\n" + errorMsg
  77. print "SSH command execution finished"
  78. return {"exitstatus": sshstat.returncode, "log": log, "errormsg": errorMsg}
  79. class SCP:
  80. """
  81. SCP implementation that is thread based. The status can be returned using
  82. status val
  83. """
  84. def __init__(self, user, sshkey_file, host, inputFile, remote, errorMessage = None):
  85. self.user = user
  86. self.sshkey_file = sshkey_file
  87. self.host = host
  88. self.inputFile = inputFile
  89. self.remote = remote
  90. self.errorMessage = errorMessage
  91. def run(self):
  92. scpcommand = ["scp",
  93. "-r",
  94. "-o", "ConnectTimeout=60",
  95. "-o", "BatchMode=yes",
  96. "-o", "StrictHostKeyChecking=no",
  97. "-i", self.sshkey_file, self.inputFile, self.user + "@" +
  98. self.host + ":" + self.remote]
  99. i = 1
  100. while True:
  101. try:
  102. scpstat = subprocess.Popen(scpcommand, stdout=subprocess.PIPE,
  103. stderr=subprocess.PIPE)
  104. log = scpstat.communicate()
  105. if scpstat.returncode != 0:
  106. print "Executing SCP command on {0} failed: {1}".format(self.host, log)
  107. print "\nRetrying SCP command one more time!"
  108. if i >= 3:
  109. break
  110. i += 1
  111. time.sleep(10)
  112. continue
  113. break
  114. except:
  115. print "Could not SCP to {0}, waiting for it to start".format(self.host)
  116. i += 1
  117. time.sleep(10)
  118. if i >= 3:
  119. print "Could not execute remote scp command: " + ' '.join(scpcommand)
  120. raise Exception("Could not connect to {0}. Giving up with erros: {1}".format(self.host, log))
  121. errorMsg = log[1]
  122. if self.errorMessage and scpstat.returncode != 0:
  123. errorMsg = self.errorMessage + "\n" + errorMsg
  124. print "SCP command execution finished"
  125. return {"exitstatus": scpstat.returncode, "log": log, "errormsg": errorMsg}
  126. # main method to parse arguments from user and start work
  127. def main():
  128. parser = argparse.ArgumentParser(
  129. description='This script brings up a cluster with ambari installed, configured and started',
  130. epilog='Only GCE is supported as of now!'
  131. )
  132. # options
  133. parser.add_argument('--controller', type=str,
  134. action='store', help='GCE controller ip address.')
  135. parser.add_argument('--key', type=str,
  136. action='store', help='Path to GCE ssh key.')
  137. parser.add_argument('--cluster-suffix', type=str,
  138. action='store', help='Cluster name suffix.')
  139. parser.add_argument('--agent-prefix', type=str,
  140. action='store', help='Agent name prefix.')
  141. parser.add_argument('--agents-count', type=int,
  142. action='store', help='Agents count for whole cluster (multiples of 50).')
  143. if len(sys.argv) <= 1:
  144. parser.print_help()
  145. sys.exit(-1)
  146. args = parser.parse_args()
  147. do_work(args)
  148. def do_work(args):
  149. """
  150. Check that all required args are passed in. If so, deploy the cluster.
  151. :param args: Command line args
  152. """
  153. if not args.controller:
  154. raise Exception("GCE controller ip address is not defined!")
  155. if not args.key:
  156. raise Exception("Path to gce ssh key is not defined!")
  157. if not args.cluster_suffix:
  158. raise Exception("Cluster name suffix is not defined!")
  159. if not args.agent_prefix:
  160. raise Exception("Agent name prefix is not defined!")
  161. if not args.agents_count:
  162. raise Exception("Agents count for whole cluster is not defined (will put 50 Agents per VM)!")
  163. deploy_cluster(args)
  164. def deploy_cluster(args):
  165. """
  166. Process cluster deployment
  167. :param args: Command line args.
  168. """
  169. # When dividing, need to get the ceil.
  170. number_of_nodes = ((args.agents_count - 1) / NUMBER_OF_AGENTS_ON_HOST) + 1
  171. # In case of an error after creating VMs, can simply comment out this function to run again without creating VMs.
  172. create_vms(args, number_of_nodes)
  173. # getting list of vms information like hostname and ip address
  174. print "Getting list of virtual machines from cluster..."
  175. # Dictionary from host name to IP
  176. (server_dict, agents_dict) = get_vms_list(args)
  177. # check number of nodes in cluster to be the same as user asked
  178. print "Checking count of created nodes in cluster..."
  179. if not agents_dict or len(agents_dict) < number_of_nodes:
  180. raise Exception("Cannot bring up enough nodes. Requested {0}, but got {1}. Probably not enough resources!".format(number_of_nodes, len(agents_dict)))
  181. print "GCE cluster was successfully created!\n"
  182. # installing/starting ambari-server and ambari-agents on each host
  183. server_item = server_dict.items()[0]
  184. server_host_name = server_item[0]
  185. server_ip = server_item[1]
  186. print "=========================="
  187. print "Server Hostname: %s" % server_host_name
  188. print "Server IP: %s" % server_ip
  189. print "==========================\n"
  190. # Sort the agents by hostname into a list.
  191. sorted_agents = sort_hosts(agents_dict)
  192. pretty_print_vms(sorted_agents)
  193. print "Creating server.sh script (which will be executed on server to install/configure/start ambari-server)..."
  194. create_server_script(server_host_name)
  195. print "Creating agent.sh script (which will be executed on agent hosts to install/configure/start ambari-agent..."
  196. create_agent_script(server_host_name)
  197. time.sleep(10)
  198. prepare_server(args, server_host_name, server_ip)
  199. # If the user asks for a number of agents that is not a multiple of 50, then only create how many are needed instead
  200. # of 50 on every VM.
  201. num_agents_left_to_create = args.agents_count
  202. start_num = 1
  203. for (hostname, ip) in sorted_agents:
  204. num_agents_on_this_host = min(num_agents_left_to_create, NUMBER_OF_AGENTS_ON_HOST)
  205. print "=========================="
  206. print "Working on VM {0} that will contain hosts {1} - {2}".format(hostname, start_num, start_num + num_agents_on_this_host - 1)
  207. # The agent multiplier config will be different on each VM.
  208. cmd_generate_multiplier_conf = "mkdir -p /etc/ambari-agent/conf/ ; printf \"start={0}\\nnum={1}\\nprefix={2}\" > /etc/ambari-agent/conf/agent-multiplier.conf".format(start_num, num_agents_on_this_host, args.agent_prefix)
  209. start_num += num_agents_on_this_host
  210. num_agents_left_to_create -= num_agents_on_this_host
  211. prepare_agent(args, hostname, ip, cmd_generate_multiplier_conf)
  212. pass
  213. print "All scripts where successfully copied and started on all hosts. " \
  214. "\nPay attention that server.sh script need 5 minutes to finish and agent.sh need 3 minutes!"
  215. def create_vms(args, number_of_nodes):
  216. """
  217. Request the server and VMs for the agents from GCE.
  218. :param args: Command line args
  219. :param number_of_nodes: Number of VMs to request.
  220. """
  221. print "Creating server VM {0}-server-{1} with xxlarge nodes on centos6...".format(cluster_prefix, args.cluster_suffix)
  222. execute_command(args, args.controller, "/usr/sbin/gce up {0}-server-{1} 1 --centos6 --xxlarge --ex --disk-xxlarge --ssd".format(cluster_prefix, args.cluster_suffix),
  223. "Failed to create server, probably not enough resources!", "-tt")
  224. time.sleep(10)
  225. # trying to create cluster with needed params
  226. print "Creating agent VMs {0}-agent-{1} with {2} xlarge nodes on centos6...".format(cluster_prefix, args.cluster_suffix, str(number_of_nodes))
  227. execute_command(args, args.controller, "/usr/sbin/gce up {0}-agent-{1} {2} --centos6 --xlarge --ex --disk-xlarge".format(cluster_prefix, args.cluster_suffix, str(number_of_nodes)),
  228. "Failed to create cluster VMs, probably not enough resources!", "-tt")
  229. # VMs are not accessible immediately
  230. time.sleep(10)
  231. def prepare_server(args, hostname, ip):
  232. remote_path = "/server.sh"
  233. local_path = "server.sh"
  234. print "Copying server.sh to {0}...".format(hostname)
  235. put_file(args, ip, local_path, remote_path, "Failed to copy file!")
  236. print "Executing remote ssh command (set correct permissions and start executing server.sh in separate process) on {0}...".format(hostname)
  237. execute_command(args, ip, "cd /; chmod 777 server.sh; nohup ./server.sh >/server.log 2>&1 &",
  238. "Install/configure/start server script failed!")
  239. def prepare_agent(args, hostname, ip, cmd_generate_multiplier_conf):
  240. remote_path = "/agent.sh"
  241. local_path = "agent.sh"
  242. print "Copying agent.sh to {0}...".format(hostname)
  243. put_file(args, ip, local_path, remote_path, "Failed to copy file!")
  244. print "Generating agent-multiplier.conf"
  245. execute_command(args, ip, cmd_generate_multiplier_conf, "Failed to generate agent-multiplier.conf on host {0}".format(hostname))
  246. print "Executing remote ssh command (set correct permissions and start executing agent.sh in separate process) on {0}...".format(hostname)
  247. execute_command(args, ip, "cd /; chmod 777 agent.sh; nohup ./agent.sh >/agent.log 2>&1 &",
  248. "Install/configure start agent script failed!")
  249. def create_server_script(server_host_name):
  250. """
  251. Creating server.sh script in the same dir where current script is located
  252. server.sh script will install, configure and start ambari-server and ambari-agent on host
  253. :param server_host_name: Server host name
  254. """
  255. # ambari-server setup <options> may not work property, so doing several calls like
  256. # echo "arg=value" >> .../ambari.properties
  257. contents = "#!/bin/bash\n" + \
  258. "wget -O /etc/yum.repos.d/ambari.repo {0}\n".format(ambari_repo_file_url) + \
  259. "yum clean all; yum install git ambari-server -y\n" + \
  260. "mkdir /home ; cd /home ; git clone https://github.com/apache/ambari.git ; cd ambari ; git checkout branch-2.5\n" + \
  261. "cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-server/resources/stacks/PERF\n" + \
  262. "cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-agent/cache/stacks/PERF\n" + \
  263. "sed -i -f /home/ambari/ambari-server/src/main/resources/stacks/PERF/install_packages.sed /var/lib/ambari-server/resources/custom_actions/scripts/install_packages.py\n" + \
  264. "sed -i -f /home/ambari/ambari-server/src/main/resources/stacks/PERF/install_packages.sed /var/lib/ambari-agent/cache/custom_actions/scripts/install_packages.py\n" + \
  265. "\n" + \
  266. "\n" + \
  267. "cd /; wget http://central.maven.org/maven2/mysql/mysql-connector-java/5.1.40/mysql-connector-java-5.1.40.jar;\n" + \
  268. "mkdir /usr/share/java; chmod 777 /usr/share/java;" + \
  269. "cp mysql-connector-java-5.1.40.jar /usr/share/java/; chmod 777 /usr/share/java/mysql-connector-java-5.1.40.jar;\n" + \
  270. "ln -s /usr/share/java/mysql-connector-java-5.1.40.jar /usr/share/java/mysql-connector-java.jar;\n" + \
  271. "cd /etc/yum.repos.d/; wget http://repo.mysql.com/mysql-community-release-el6-5.noarch.rpm; rpm -ivh mysql-community-release-el6-5.noarch.rpm;" + \
  272. "yum clean all; yum install mysql-server -y\n" + \
  273. "sed -i -e 's/mysqld]/mysqld]\\nmax_allowed_packet=1024M\\njoin_buffer_size=512M\\nsort_buffer_size=128M\\nread_rnd_buffer_size=128M\\ninnodb_buffer_pool_size=16G" \
  274. "\\ninnodb_file_io_threads=16\\ninnodb_thread_concurrency=32\\nkey_buffer_size=16G\\nquery_cache_limit=16M\\nquery_cache_size=512M\\nthread_cache_size=128\\ninnodb_log_buffer_size=512M/1' /etc/my.cnf\n" + \
  275. "service mysqld start\n" + \
  276. "mysql -uroot -e \"CREATE DATABASE ambari;\"\n" + \
  277. "mysql -uroot -e \"SOURCE /var/lib/ambari-server/resources/Ambari-DDL-MySQL-CREATE.sql;\" ambari\n" + \
  278. "mysql -uroot -e \"CREATE USER 'ambari'@'%' IDENTIFIED BY 'bigdata';\"\n" + \
  279. "mysql -uroot -e \"GRANT ALL PRIVILEGES ON *.* TO 'ambari'@'%%';\"\n" + \
  280. "mysql -uroot -e \"CREATE USER 'ambari'@'localhost' IDENTIFIED BY 'bigdata';\"\n" + \
  281. "mysql -uroot -e \"GRANT ALL PRIVILEGES ON *.* TO 'ambari'@'localhost';\"\n" + \
  282. "mysql -uroot -e \"CREATE USER 'ambari'@'{0}' IDENTIFIED BY 'bigdata';\"\n".format(server_host_name) + \
  283. "mysql -uroot -e \"GRANT ALL PRIVILEGES ON *.* TO 'ambari'@'{0}';\"\n".format(server_host_name) + \
  284. "mysql -uroot -e \"FLUSH PRIVILEGES;\"\n" + \
  285. "\n" + \
  286. "\n" + \
  287. "ambari-server setup -s\n" + \
  288. "ambari-server setup --database mysql --jdbc-db=mysql --jdbc-driver=/usr/share/java/mysql-connector-java.jar --databasehost=localhost --databaseport=3306 --databasename=ambari --databaseusername=ambari --databasepassword=bigdata\n" + \
  289. "sed -i -e 's/=postgres/=mysql/g' /etc/ambari-server/conf/ambari.properties\n" + \
  290. "sed -i -e 's/server.persistence.type=local/server.persistence.type=remote/g' /etc/ambari-server/conf/ambari.properties\n" + \
  291. "sed -i -e 's/local.database.user=postgres//g' /etc/ambari-server/conf/ambari.properties\n" + \
  292. "sed -i -e 's/server.jdbc.postgres.schema=ambari//g' /etc/ambari-server/conf/ambari.properties\n" + \
  293. "sed -i -e 's/agent.threadpool.size.max=25/agent.threadpool.size.max=100/g' /etc/ambari-server/conf/ambari.properties\n" + \
  294. "sed -i -e 's/client.threadpool.size.max=25/client.threadpool.size.max=65/g' /etc/ambari-server/conf/ambari.properties\n" + \
  295. "sed -i -e 's/false/true/g' /var/lib/ambari-server/resources/stacks/PERF/1.0/metainfo.xml\n" + \
  296. "sed -i -e 's/false/true/g' /var/lib/ambari-server/resources/stacks/PERF/2.0/metainfo.xml\n" + \
  297. "sed -i -e 's/-Xmx2048m/-Xmx16384m/g' /var/lib/ambari-server/ambari-env.sh\n" + \
  298. "\n" + \
  299. "echo 'server.jdbc.driver=com.mysql.jdbc.Driver' >> /etc/ambari-server/conf/ambari.properties\n" + \
  300. "echo 'server.jdbc.rca.url=jdbc:mysql://{0}:3306/ambari' >> /etc/ambari-server/conf/ambari.properties\n".format(server_host_name) + \
  301. "echo 'server.jdbc.rca.driver=com.mysql.jdbc.Driver' >> /etc/ambari-server/conf/ambari.properties\n" + \
  302. "echo 'server.jdbc.url=jdbc:mysql://{0}:3306/ambari' >> /etc/ambari-server/conf/ambari.properties\n".format(server_host_name) + \
  303. "echo 'server.jdbc.port=3306' >> /etc/ambari-server/conf/ambari.properties\n" + \
  304. "echo 'server.jdbc.hostname=localhost' >> /etc/ambari-server/conf/ambari.properties\n" + \
  305. "echo 'server.jdbc.driver.path=/usr/share/java/mysql-connector-java.jar' >> /etc/ambari-server/conf/ambari.properties\n" + \
  306. "echo 'alerts.cache.enabled=true' >> /etc/ambari-server/conf/ambari.properties\n" + \
  307. "echo 'alerts.cache.size=100000' >> /etc/ambari-server/conf/ambari.properties\n" + \
  308. "echo 'alerts.execution.scheduler.maxThreads=4' >> /etc/ambari-server/conf/ambari.properties\n" + \
  309. "echo 'security.temporary.keystore.retention.minutes=180' >> /etc/ambari-server/conf/ambari.properties\n" + \
  310. "\n" + \
  311. "ambari-server start --skip-database-check\n" + \
  312. "exit 0"
  313. with open("server.sh", "w") as f:
  314. f.write(contents)
  315. def create_agent_script(server_host_name):
  316. """
  317. Creating agent.sh script in the same dir where current script is located
  318. agent.sh script will install, configure and start ambari-agent on host
  319. :param server_host_name: Server host name
  320. """
  321. # TODO, instead of cloning Ambari repo on each VM, do it on the server once and distribute to all of the agents.
  322. contents = "#!/bin/bash\n" + \
  323. "wget -O /etc/yum.repos.d/ambari.repo {0}\n".format(ambari_repo_file_url) + \
  324. "yum clean all; yum install krb5-workstation git ambari-agent -y\n" + \
  325. "mkdir /home ; cd /home; git clone https://github.com/apache/ambari.git ; cd ambari ; git checkout branch-2.5\n" + \
  326. "cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-agent/cache/stacks/PERF\n" + \
  327. "sed -i -f /var/lib/ambari-agent/cache/stacks/PERF/PythonExecutor.sed /usr/lib/python2.6/site-packages/ambari_agent/PythonExecutor.py\n" + \
  328. "sed -i -e 's/hostname=localhost/hostname={0}/g' /etc/ambari-agent/conf/ambari-agent.ini\n".format(server_host_name) + \
  329. "sed -i -e 's/agent]/agent]\\nhostname_script={0}\\npublic_hostname_script={1}\\n/1' /etc/ambari-agent/conf/ambari-agent.ini\n".format(hostname_script, public_hostname_script) + \
  330. "python /home/ambari/ambari-agent/conf/unix/agent-multiplier.py start\n" + \
  331. "exit 0"
  332. with open("agent.sh", "w") as f:
  333. f.write(contents)
  334. def execute_command(args, ip, cmd, fail_message, custom_option='', login='root'):
  335. """
  336. Method to execute ssh commands via SSH class
  337. :param args: Command line args
  338. :param ip: IP to ssh to
  339. :param cmd: Command to execute
  340. :param fail_message: In case of an error, what to report
  341. :param custom_option: Custom flags
  342. :param login: Login user
  343. :return: Return execute log message
  344. """
  345. ssh = SSH(login, args.key, ip, cmd, custom_option, fail_message)
  346. ssh_result = ssh.run()
  347. status_code = ssh_result["exitstatus"]
  348. if status_code != 0:
  349. raise Exception(ssh_result["errormsg"])
  350. return ssh_result["log"][0]
  351. def put_file(args, ip, local_file, remote_file, fail_message, login='root'):
  352. """
  353. Method to copy file from local to remote host via SCP class
  354. :param args: Command line args
  355. :param ip: IP to ssh to
  356. :param local_file: Path to local file
  357. :param remote_file: Path to remote file
  358. :param fail_message: In case of an error, what to report
  359. :param login: Login user.
  360. :return: Return copy log message
  361. """
  362. scp = SCP(login, args.key, ip, local_file,
  363. remote_file, fail_message)
  364. scp_result = scp.run()
  365. status_code = scp_result["exitstatus"]
  366. if status_code != 0:
  367. raise Exception(scp_result["errormsg"])
  368. return scp_result["log"][0]
  369. def get_vms_list(args):
  370. """
  371. Get tuple of (x, y) where
  372. x = dictionary from single server host name to ip
  373. y = dictionary from multiple agent host names to ip
  374. :param args: Command line arguments
  375. :return: Tuple of dictionaries of hostnames and ip for server and agents.
  376. """
  377. # Get the server.
  378. server = __get_vms_list_from_name(args, "{0}-server-{1}".format(cluster_prefix, args.cluster_suffix))
  379. # Get the agents
  380. agents = __get_vms_list_from_name(args, "{0}-agent-{1}".format(cluster_prefix, args.cluster_suffix))
  381. return (server, agents)
  382. def __get_vms_list_from_name(args, cluster_name):
  383. """
  384. Method to parse "gce fqdn {cluster-name}" command output and get hosts and ips pairs for every host in cluster
  385. :param args: Command line args
  386. :return: Mapping of VM host name to ip.
  387. """
  388. gce_fqdb_cmd = '/usr/sbin/gce fqdn {0}'.format(cluster_name)
  389. out = execute_command(args, args.controller, gce_fqdb_cmd, "Failed to get VMs list!", "-tt")
  390. lines = out.split('\n')
  391. #print "LINES=" + str(lines)
  392. if lines[0].startswith("Using profile") and not lines[1].strip():
  393. result = {}
  394. for s in lines[2:]: # Ignore non-meaningful lines
  395. if not s:
  396. continue
  397. match = re.match(r'^([\d\.]*)\s+([\w\.-]*)\s+([\w\.-]*)\s$', s, re.M)
  398. if match:
  399. result[match.group(2)] = match.group(1)
  400. else:
  401. raise Exception('Cannot parse "{0}"'.format(s))
  402. return result
  403. else:
  404. raise Exception('Cannot parse "{0}"'.format(lines))
  405. def sort_hosts(hosts):
  406. """
  407. Sort the hosts by name and take into account the numbers.
  408. :param hosts: Dictionary from host name (e.g., perf-9-test, perf-62-test), to the IP
  409. :return: Sorted list of tuples
  410. """
  411. host_names = hosts.keys()
  412. sorted_host_tuples = [(None, None),] * len(hosts)
  413. pattern = re.compile(".*?-agent-.*?(\d+)")
  414. for host_name in host_names:
  415. m = pattern.match(host_name)
  416. if m and len(m.groups()) == 1:
  417. number = int(m.group(1))
  418. ip = hosts[host_name]
  419. sorted_host_tuples[number - 1] = (host_name, ip)
  420. return sorted_host_tuples
  421. def pretty_print_vms(vms):
  422. """
  423. Pretty print the VMs hostnames
  424. :param vms: List of tuples (hostname, ip)
  425. """
  426. print "=========================="
  427. print "Hostnames of nodes in cluster:"
  428. for (hostname, ip) in vms:
  429. print hostname
  430. print "==========================\n"
  431. if __name__ == "__main__":
  432. main()