finalizeNodes.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. <?php
  2. include_once '../util/Logger.php';
  3. include_once '../conf/Config.inc';
  4. include_once 'localDirs.php';
  5. include_once "../util/lock.php";
  6. include_once "../util/util.php";
  7. include_once '../db/HMCDBAccessor.php';
  8. include_once "../util/HMCTxnUtils.php";
  9. include_once 'commandUtils.php';
  10. $dbAccessor = new HMCDBAccessor($GLOBALS["DB_PATH"]);
  11. /* If pattern exists in the output return $ret
  12. If pattern is success pattern then $ret = 0
  13. If pattern is error pattern then $ret = 1
  14. If pattern is empty return the output['retcode']
  15. which is the return code of the remote command
  16. */
  17. function check_error ($output, $pattern, $ret) {
  18. $ret1=($ret+1)%2;
  19. for ($i=0; $i<count($output); $i++) {
  20. if (preg_match ("/$pattern/", $output[$i])) {
  21. return $ret;
  22. }
  23. }
  24. return $ret1;
  25. }
  26. /* Sign and verify puppet agent */
  27. function sign_and_verify_agent ($hosts, $logger) {
  28. $origHosts = $hosts;
  29. $totalCnt = count($hosts);
  30. $output = array();
  31. $hostsState = array();
  32. foreach ($hosts as $host) {
  33. $hostsState[$host] = FALSE;
  34. }
  35. $signed_hosts = array();
  36. $logger->log_info("Getting puppet master list to find all signed agents");
  37. $cmd = "puppet cert --confdir=/etc/puppet/master list --all | grep \"^+ \"";
  38. exec ($cmd, $signed_hosts, $err);
  39. for ($k=0; $k<count($signed_hosts); $k++) {
  40. foreach ($hosts as $i => $host) {
  41. $host = trim($host);
  42. if (preg_match ("/$host/", $signed_hosts[$k])) {
  43. unset($hosts[$i]);
  44. $hostsState[$host] = TRUE;
  45. }
  46. }
  47. }
  48. $waitLoops = 10;
  49. $sleepInterval = 3;
  50. $waitLoop = 0;
  51. $logger->log_info("Looping through until all puppet agents are signed");
  52. for($waitLoop = 0; $waitLoop < $waitLoops; $waitLoop++) {
  53. $waitSecs = $waitLoop * $sleepInterval;
  54. if ($waitLoop > 0) {
  55. $logger->log_info("Waited " . $waitSecs . " seconds for puppet cert sign"
  56. . ", hostsRemaining=" . count($hosts)
  57. . ", totalHosts=" . $totalCnt
  58. . ", totalWaitedTimeSeconds=" . $waitSecs);
  59. }
  60. $unsigned_hosts = array();
  61. $cmd = "puppet cert --confdir=/etc/puppet/master list --all | grep -v \"^+ \"";
  62. exec ($cmd, $unsigned_hosts, $err);
  63. foreach ($hosts as $i => $host) {
  64. $host = trim($host);
  65. for ($j=0; $j<count($unsigned_hosts); $j++) {
  66. if (preg_match ("/$host/", $unsigned_hosts[$j])) {
  67. $logger->log_debug("Signing certificate for ".$host."\n");
  68. $out_arr = array();
  69. $cmd = "puppet cert --confdir=/etc/puppet/master sign $host";
  70. exec ($cmd, $out_arr, $retcode);
  71. if ($retcode != 0) {
  72. $logger->log_error("Failed to sign cert for host " . $host);
  73. $output[$host] =
  74. array ( "discoveryStatus" => "FAILED",
  75. "badHealthReason" => "Puppet cert sign failed: " . implode(";", $out_arr));
  76. $hostsState[$host] = FALSE;
  77. } else {
  78. $logger->log_info("Puppet cert sign succeeded for host " . $host);
  79. $hostsState[$host] = TRUE;
  80. if (isset($output[$host])) {
  81. unset($output[$host]);
  82. }
  83. }
  84. unset($hosts[$i]);
  85. break;
  86. }
  87. }
  88. }
  89. if (empty($hosts)) {
  90. break;
  91. }
  92. sleep($sleepInterval);
  93. }
  94. if ($waitLoop == 10) {
  95. $logger->log_error("Timed out waiting for all puppet agents to ping master");
  96. }
  97. // re-check if the hosts are now signed
  98. $logger->log_info("Re-checking to ensure all puppet hosts are signed");
  99. $signed_hosts = array();
  100. $cmd = "puppet cert --confdir=/etc/puppet/master list --all | grep \"^+ \"";
  101. exec ($cmd, $signed_hosts, $err);
  102. for ($k=0; $k<count($signed_hosts); $k++) {
  103. foreach ($hostsState as $hostName => $state) {
  104. $hostName = trim($hostName);
  105. if (preg_match ("/$hostName/", $signed_hosts[$k])) {
  106. $logger->log_info("Puppet cert signed for host " . $host);
  107. $hostsState[$host] = TRUE;
  108. if (isset($output[$host])) {
  109. unset($output[$host]);
  110. }
  111. }
  112. }
  113. }
  114. $countFailed = 0;
  115. $countSucceeded = 0;
  116. foreach ($hostsState as $hostName => $state) {
  117. if ($state) {
  118. $countSucceeded++;
  119. } else {
  120. $countFailed++;
  121. }
  122. }
  123. $logger->log_info("Puppet cert sign status"
  124. . ", totalHosts=" . $totalCnt
  125. . ", succeededHostsCount=" . $countSucceeded
  126. . ", failedHostsCount=" . $countFailed);
  127. sleep(5);
  128. // Run kick ping in batches of 10 hosts
  129. $hostsToKick = array();
  130. $index = 0;
  131. $counter = 0;
  132. foreach ($origHosts as $i => $host) {
  133. $host = trim($host);
  134. if (array_key_exists ($host , $output)) {
  135. continue;
  136. }
  137. $counter++;
  138. if (!isset($hostsToKick[$index])) {
  139. $hostsToKick[$index] = array();
  140. }
  141. $hostsToKick[$index][] = $host;
  142. if ($counter == 10) {
  143. $index++;
  144. $counter = 0;
  145. }
  146. }
  147. foreach ($hostsToKick as $idx => $hostKickList) {
  148. $hostList = implode(",", $hostKickList);
  149. /* Give puppet kick --ping to check if agent is working */
  150. $logger->log_debug("Puppet kick --ping for batch $idx , hosts=".$hostList."\n");
  151. $hostListStr = "";
  152. foreach ($hostKickList as $hostToKick) {
  153. $hostListStr .= " --host " . $hostToKick;
  154. }
  155. $out_arr = array();
  156. $cmd = "puppet kick -f --parallel 10 --ping $hostListStr 2>/dev/null";
  157. exec ($cmd, $out_arr, $err);
  158. // TODO do we need to check $err ?
  159. $pHostOutput = array();
  160. $pHostResponse = array();
  161. foreach ($out_arr as $line) {
  162. foreach ($hostKickList as $host) {
  163. if (preg_match ("/$host/", $line)) {
  164. if (!isset($pHostOutput[$host])) {
  165. $pHostOutput[$host] = array();
  166. }
  167. $pHostOutput[$host][] = $line;
  168. $pattern = $host." finished with exit code (\d+)";
  169. $matches = array();
  170. if (preg_match("/$pattern/", $line, $matches) > 0) {
  171. $retCode = (int)$matches[1];
  172. $pHostResponse[$host] = $retCode;
  173. }
  174. }
  175. }
  176. }
  177. $logger->log_debug("Output for batch $idx, outputLogs="
  178. . print_r($pHostOutput, true) . " , errorCodes="
  179. . print_r($pHostResponse, true) );
  180. foreach ($hostKickList as $host) {
  181. if (isset($pHostResponse[$host])
  182. && $pHostResponse[$host] == 0) {
  183. $logger->log_info("Puppet kick succeeded for host " . $host);
  184. $hostsState[$host] = TRUE;
  185. if (isset($output[$host])) {
  186. unset($output[$host]);
  187. }
  188. } else {
  189. $logger->log_error("Failed to do puppet kick -ping on host " . $host);
  190. $errorCode = -1;
  191. if (isset($pHostResponse[$host])) {
  192. $errorCode = $pHostResponse[$host];
  193. }
  194. $errorLogs = "Puppet kick failed";
  195. if (isset($pHostOutput[$host])) {
  196. $errorLogs = implode(";", $pHostOutput[$host]);
  197. }
  198. if (!isset($output[$host])) {
  199. $output[$host] =
  200. array ( "discoveryStatus" => "FAILED",
  201. "badHealthReason" => "Puppet kick failed: "
  202. . ", error=" . $errorCode
  203. . ", outputLogs=" . $errorLogs);
  204. }
  205. $hostsState[$host] = FALSE;
  206. }
  207. }
  208. }
  209. $countFailed = 0;
  210. $countSucceeded = 0;
  211. foreach ($hostsState as $hostName => $state) {
  212. if ($state) {
  213. $countSucceeded++;
  214. } else {
  215. $countFailed++;
  216. }
  217. }
  218. $logger->log_info("Puppet kick status"
  219. . ", totalHosts=" . $totalCnt
  220. . ", succeededHostsCount=" . $countSucceeded
  221. . ", failedHostsCount=" . $countFailed);
  222. $response = array();
  223. foreach ($hostsState as $host => $state) {
  224. if (!$state) {
  225. if (!isset($output[$host])) {
  226. $logger->log_error("Timed out waiting for puppet agent on host " . $host);
  227. $response[$host] = array ( "discoveryStatus" => "FAILED",
  228. "badHealthReason" => "Puppet cert sign timed out");
  229. } else {
  230. $response[$host] = $output[$host];
  231. }
  232. } else {
  233. $response[$host] = array ( "discoveryStatus" => "SUCCESS",
  234. "badHealthReason" => "");
  235. }
  236. }
  237. return $response;
  238. }
  239. $clusterName = $argv[1];
  240. $deployUser = $argv[2];
  241. $rootTxnId = $argv[3];
  242. $mySubTxnId = $argv[4];
  243. $parentSubTxnId = $argv[5];
  244. $readFromFile = $argv[6];
  245. $hosts = readHostsFile($readFromFile);
  246. $hosts = convertToLowerCase($hosts);
  247. $totalHosts = count($hosts);
  248. $logger = new HMCLogger("PuppetFinalize:txnId="
  249. . $rootTxnId . ":subTxnId=" . $mySubTxnId);
  250. $logger->log_info("Starting signing of puppet agents certs for "
  251. . count($hosts) . " hosts");
  252. $opStatus = "STARTED";
  253. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, $opStatus);
  254. if ($subTransactionReturnValue["result"] != 0 ) {
  255. $logger->log_error("Got error while updating subTxn: ".$subTransactionReturnValue["error"]);
  256. print json_encode($subTransactionReturnValue);
  257. return;
  258. }
  259. // Create progress files for UI to track
  260. $operationName = "finalizeNodes";
  261. $clusterDir = getClusterDir($clusterName);
  262. $myDir = $clusterDir . $operationName . "/";
  263. if (is_dir($myDir)) {
  264. rrmdir($myDir);
  265. }
  266. mkdir($myDir);
  267. foreach ($hosts as $host) {
  268. $fileName = $myDir . "/" . $host . ".out";
  269. $h = fopen($fileName, "a");
  270. if ($h !== FALSE) {
  271. fclose($h);
  272. }
  273. }
  274. $result = sign_and_verify_agent ($hosts,$logger);
  275. $logger->log_debug("Puppet Cert Sign Result:\n".print_r($result, true));
  276. $nodeFileOut = fopen($readFromFile, "w");
  277. if ($nodeFileOut == FALSE) {
  278. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, "TOTALFAILURE");
  279. $logger->log_error("Got error while trying to rewrite hosts file");
  280. return;
  281. }
  282. $updateHosts = array();
  283. $failedHosts = 0;
  284. $successfulHosts = 0;
  285. foreach ($result as $hostName => $hostInfo) {
  286. $fileName = $myDir . "/" . $hostName . ".done";
  287. $errFileName = $myDir . "/" . $hostName . ".err";
  288. if ($hostInfo["discoveryStatus"] == "FAILED") {
  289. $updateHosts[$hostName] = $hostInfo;
  290. $errorString = $hostInfo["badHealthReason"];
  291. $f = fopen($errFileName, "w");
  292. if ($f !== FALSE) {
  293. for ($written = 0; $written < strlen($errorString);) {
  294. $writtenBytes = fwrite($f, substr($errorString, $written));
  295. if ($writtenBytes === FALSE) {
  296. $logger->log_error("Failed to write error file for puppet cert sign failure"
  297. . ", host=" . $hostName
  298. . ", errFile=" . $errFileName
  299. . ", error=" . $errorString);
  300. break;
  301. }
  302. $written += $writtenBytes;
  303. }
  304. fflush($f);
  305. fclose($f);
  306. } else {
  307. $logger->log_error("Failed to write error file for puppet cert sign failure"
  308. . ", host=" . $hostName
  309. . ", errFile=" . $errFileName
  310. . ", error=" . $errorString);
  311. }
  312. system("echo \"1\" > " . $fileName);
  313. $failedHosts++;
  314. } else {
  315. system("echo \"0\" > " . $fileName);
  316. // write the nodename to the readFromFile file.
  317. fwrite($nodeFileOut, $hostName."\n");
  318. $successfulHosts++;
  319. }
  320. }
  321. fclose($nodeFileOut);
  322. $logger->log_debug("Updating DB for hosts discovery status for puppet agent cert signing");
  323. $ret = $dbAccessor->updateHostDiscoveryStatus($clusterName, $updateHosts);
  324. if ($ret["result"] != 0) {
  325. $logger->log_error("Failed to update DB for hosts status, error="
  326. . $ret["error"]);
  327. // TODO - handle failure?
  328. }
  329. $opStatus = "SUCCESS";
  330. if ($totalHosts > 0) {
  331. if ($successfulHosts == 0) {
  332. $opStatus = "TOTALFAILURE";
  333. } else if ($failedHosts > 0) {
  334. $opStatus = "FAILED";
  335. }
  336. }
  337. $logger->log_info("Puppet finalize, succeeded for " . $successfulHosts
  338. . " and failed for " . $failedHosts . " of total " . $totalHosts . " hosts");
  339. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, $opStatus);
  340. if ($subTransactionReturnValue["result"] != 0 ) {
  341. $logger->log_error("Got error while updating subTxn: ".$subTransactionReturnValue["error"]);
  342. print json_encode($subTransactionReturnValue);
  343. return;
  344. }
  345. $logger->log_info("Completed signing of certs for puppet agents, opStatus=" . $opStatus);
  346. ?>