finalizeNodes.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. <?php
  2. include_once '../util/Logger.php';
  3. include_once '../conf/Config.inc';
  4. include_once 'localDirs.php';
  5. include_once "../util/lock.php";
  6. include_once "../util/util.php";
  7. include_once '../db/HMCDBAccessor.php';
  8. include_once "../util/HMCTxnUtils.php";
  9. include_once 'commandUtils.php';
  10. $dbAccessor = new HMCDBAccessor($GLOBALS["DB_PATH"]);
  11. /* If pattern exists in the output return $ret
  12. If pattern is success pattern then $ret = 0
  13. If pattern is error pattern then $ret = 1
  14. If pattern is empty return the output['retcode']
  15. which is the return code of the remote command
  16. */
  17. function check_error ($output, $pattern, $ret) {
  18. $ret1=($ret+1)%2;
  19. for ($i=0; $i<count($output); $i++) {
  20. if (preg_match ("/$pattern/", $output[$i])) {
  21. return $ret;
  22. }
  23. }
  24. return $ret1;
  25. }
  26. function ping($host,$port=8139,$timeout=10, &$errstr, &$errno)
  27. {
  28. $fsock = fsockopen($host, $port, $errno, $errstr, $timeout);
  29. if ( ! $fsock )
  30. {
  31. return FALSE;
  32. }
  33. else
  34. {
  35. fclose($fsock);
  36. return TRUE;
  37. }
  38. }
  39. /* Sign and verify puppet agent */
  40. function sign_and_verify_agent ($hosts, $logger) {
  41. $origHosts = $hosts;
  42. $totalCnt = count($hosts);
  43. $output = array();
  44. $hostsState = array();
  45. foreach ($hosts as $host) {
  46. $hostsState[$host] = FALSE;
  47. }
  48. $logger->log_info("Starting sign/verify puppet agent for " . $totalCnt
  49. . " nodes, hosts=" . implode(",", $origHosts));
  50. $signed_hosts = array();
  51. $logger->log_info("Getting puppet master list to find all signed agents");
  52. $cmd = "puppet cert --confdir=/etc/puppet/master list --all | grep \"^+ \"";
  53. exec ($cmd, $signed_hosts, $err);
  54. for ($k=0; $k<count($signed_hosts); $k++) {
  55. foreach ($hosts as $i => $host) {
  56. $host = trim($host);
  57. if (preg_match ("/$host/", $signed_hosts[$k])) {
  58. unset($hosts[$i]);
  59. $hostsState[$host] = TRUE;
  60. }
  61. }
  62. }
  63. $waitLoops = 10;
  64. $sleepInterval = 3;
  65. $waitLoop = 0;
  66. $logger->log_info("Looping through until all puppet agents are signed");
  67. for($waitLoop = 0; $waitLoop < $waitLoops; $waitLoop++) {
  68. $waitSecs = $waitLoop * $sleepInterval;
  69. if ($waitLoop > 0) {
  70. $logger->log_info("Waited " . $waitSecs . " seconds for puppet cert sign"
  71. . ", hostsRemaining=" . count($hosts)
  72. . ", totalHosts=" . $totalCnt
  73. . ", totalWaitedTimeSeconds=" . $waitSecs);
  74. }
  75. $unsigned_hosts = array();
  76. $cmd = "puppet cert --confdir=/etc/puppet/master list --all | grep -v \"^+ \"";
  77. exec ($cmd, $unsigned_hosts, $err);
  78. foreach ($hosts as $i => $host) {
  79. $host = trim($host);
  80. for ($j=0; $j<count($unsigned_hosts); $j++) {
  81. if (preg_match ("/$host/", $unsigned_hosts[$j])) {
  82. $logger->log_debug("Signing certificate for ".$host."\n");
  83. $out_arr = array();
  84. $cmd = "puppet cert --confdir=/etc/puppet/master sign $host";
  85. exec ($cmd, $out_arr, $retcode);
  86. if ($retcode != 0) {
  87. $logger->log_error("Failed to sign cert for host " . $host);
  88. $output[$host] =
  89. array ( "discoveryStatus" => "FAILED",
  90. "badHealthReason" => "Puppet cert sign failed: " . implode(";", $out_arr));
  91. $hostsState[$host] = FALSE;
  92. } else {
  93. $logger->log_info("Puppet cert sign succeeded for host " . $host);
  94. $hostsState[$host] = TRUE;
  95. if (isset($output[$host])) {
  96. unset($output[$host]);
  97. }
  98. }
  99. unset($hosts[$i]);
  100. break;
  101. }
  102. }
  103. }
  104. if (empty($hosts)) {
  105. break;
  106. }
  107. sleep($sleepInterval);
  108. }
  109. if ($waitLoop == 10) {
  110. $logger->log_error("Timed out waiting for all puppet agents to ping master");
  111. }
  112. // re-check if the hosts are now signed
  113. $logger->log_info("Re-checking to ensure all puppet hosts are signed");
  114. $signed_hosts = array();
  115. $cmd = "puppet cert --confdir=/etc/puppet/master list --all | grep \"^+ \"";
  116. exec ($cmd, $signed_hosts, $err);
  117. for ($k=0; $k<count($signed_hosts); $k++) {
  118. foreach ($hostsState as $hostName => $state) {
  119. $hostName = trim($hostName);
  120. if (preg_match ("/$hostName/", $signed_hosts[$k])) {
  121. $logger->log_info("Puppet cert signed for host " . $host);
  122. $hostsState[$host] = TRUE;
  123. if (isset($output[$host])) {
  124. unset($output[$host]);
  125. }
  126. }
  127. }
  128. }
  129. $countFailed = 0;
  130. $countSucceeded = 0;
  131. foreach ($hostsState as $hostName => $state) {
  132. if ($state) {
  133. $countSucceeded++;
  134. } else {
  135. $countFailed++;
  136. }
  137. }
  138. $logger->log_info("Puppet cert sign status"
  139. . ", totalHosts=" . $totalCnt
  140. . ", succeededHostsCount=" . $countSucceeded
  141. . ", failedHostsCount=" . $countFailed);
  142. sleep(5);
  143. // run multiple attempts for pings to handle intermittent failures
  144. $pendingNodes = array();
  145. foreach ($origHosts as $i => $host) {
  146. $host = trim($host);
  147. if (array_key_exists ($host , $output)) {
  148. continue;
  149. }
  150. array_push($pendingNodes, $host);
  151. }
  152. $retryAttempt = 0;
  153. do {
  154. $retryAttempt++;
  155. if ($retryAttempt > 1) {
  156. // keep a small sleep between retries
  157. // no sleep on first loop
  158. sleep(3);
  159. }
  160. $logger->log_debug("Puppet kick --ping retry attempt " . $retryAttempt
  161. . ", pendingHoststoCheck=" . implode(",", $pendingNodes));
  162. $failedNodes = array();
  163. $pHostOutput = array();
  164. $pHostResponse = array();
  165. foreach ($pendingNodes as $i => $host) {
  166. /* Give ping agent check if it is working */
  167. $logger->log_debug("Pinging puppet agent for host=".$host);
  168. $errstr = "";
  169. $errno = "";
  170. ping($host, 8139, 10, $errstr, $errno);
  171. $pHostOutput[$host] = $errstr;
  172. $pHostResponse[$host] = $errno;
  173. }
  174. foreach ($pendingNodes as $i => $host) {
  175. if ($pHostResponse[$host] == 0) {
  176. $logger->log_info("Ping to puppet agent succeeded for host [" . $host . "]");
  177. $hostsState[$host] = TRUE;
  178. if (isset($output[$host])) {
  179. unset($output[$host]);
  180. }
  181. } else {
  182. $logger->log_error("Failed to ping puppet agent on host [" . $host . "]: " . $pHostOutput[$host]);
  183. $failedNodes[] = $host;
  184. $errorCode = $pHostResponse[$host];
  185. $errorLogs = "Puppet agent ping failed: [" . $pHostOutput[$host] . "]";
  186. if (!isset($output[$host])) {
  187. $output[$host] =
  188. array ( "discoveryStatus" => "FAILED",
  189. "badHealthReason" => "Puppet agent ping failed: "
  190. . ", error=" . $errorCode
  191. . ", outputLogs=" . $errorLogs);
  192. }
  193. $hostsState[$host] = FALSE;
  194. }
  195. }
  196. $pendingNodes = $failedNodes;
  197. } while (!empty($pendingNodes) && $retryAttempt < 3);
  198. $countFailed = 0;
  199. $countSucceeded = 0;
  200. foreach ($hostsState as $hostName => $state) {
  201. if ($state) {
  202. $countSucceeded++;
  203. } else {
  204. $countFailed++;
  205. }
  206. }
  207. $logger->log_info("Puppet agent ping status"
  208. . ", totalHosts=" . $totalCnt
  209. . ", succeededHostsCount=" . $countSucceeded
  210. . ", failedHostsCount=" . $countFailed);
  211. $response = array();
  212. foreach ($hostsState as $host => $state) {
  213. if (!$state) {
  214. if (!isset($output[$host])) {
  215. $logger->log_error("Timed out waiting for puppet agent on host " . $host);
  216. $response[$host] = array ( "discoveryStatus" => "FAILED",
  217. "badHealthReason" => "Puppet cert sign timed out");
  218. } else {
  219. $response[$host] = $output[$host];
  220. }
  221. } else {
  222. $response[$host] = array ( "discoveryStatus" => "SUCCESS",
  223. "badHealthReason" => "");
  224. }
  225. }
  226. $logger->log_info("Completed sign/verify puppet agent for "
  227. . count($response) . " nodes"
  228. . ", result=" . print_r($response, true));
  229. return $response;
  230. }
  231. $clusterName = $argv[1];
  232. $deployUser = $argv[2];
  233. $rootTxnId = $argv[3];
  234. $mySubTxnId = $argv[4];
  235. $parentSubTxnId = $argv[5];
  236. $readFromFile = $argv[6];
  237. $hosts = readHostsFile($readFromFile);
  238. $hosts = convertToLowerCase($hosts);
  239. $totalHosts = count($hosts);
  240. $logger = new HMCLogger("PuppetFinalize:txnId="
  241. . $rootTxnId . ":subTxnId=" . $mySubTxnId);
  242. $logger->log_info("Starting signing of puppet agents certs for "
  243. . count($hosts) . " hosts");
  244. $opStatus = "STARTED";
  245. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, $opStatus);
  246. if ($subTransactionReturnValue["result"] != 0 ) {
  247. $logger->log_error("Got error while updating subTxn: ".$subTransactionReturnValue["error"]);
  248. print json_encode($subTransactionReturnValue);
  249. return;
  250. }
  251. // Create progress files for UI to track
  252. $operationName = "finalizeNodes";
  253. $clusterDir = getClusterDir($clusterName);
  254. $myDir = $clusterDir . $operationName . "/";
  255. if (is_dir($myDir)) {
  256. rrmdir($myDir);
  257. }
  258. mkdir($myDir);
  259. foreach ($hosts as $host) {
  260. $fileName = $myDir . "/" . $host . ".out";
  261. $h = fopen($fileName, "a");
  262. if ($h !== FALSE) {
  263. fclose($h);
  264. }
  265. }
  266. $result = sign_and_verify_agent ($hosts,$logger);
  267. $logger->log_debug("Puppet Cert Sign Result:\n".print_r($result, true));
  268. $nodeFileOut = fopen($readFromFile, "w");
  269. if ($nodeFileOut == FALSE) {
  270. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, "TOTALFAILURE");
  271. $logger->log_error("Got error while trying to rewrite hosts file");
  272. return;
  273. }
  274. $updateHosts = array();
  275. $failedHosts = 0;
  276. $successfulHosts = 0;
  277. foreach ($result as $hostName => $hostInfo) {
  278. $fileName = $myDir . "/" . $hostName . ".done";
  279. $errFileName = $myDir . "/" . $hostName . ".err";
  280. if ($hostInfo["discoveryStatus"] == "FAILED") {
  281. $updateHosts[$hostName] = $hostInfo;
  282. $errorString = $hostInfo["badHealthReason"];
  283. $f = fopen($errFileName, "w");
  284. if ($f !== FALSE) {
  285. for ($written = 0; $written < strlen($errorString);) {
  286. $writtenBytes = fwrite($f, substr($errorString, $written));
  287. if ($writtenBytes === FALSE) {
  288. $logger->log_error("Failed to write error file for puppet cert sign failure"
  289. . ", host=" . $hostName
  290. . ", errFile=" . $errFileName
  291. . ", error=" . $errorString);
  292. break;
  293. }
  294. $written += $writtenBytes;
  295. }
  296. fflush($f);
  297. fclose($f);
  298. } else {
  299. $logger->log_error("Failed to write error file for puppet cert sign failure"
  300. . ", host=" . $hostName
  301. . ", errFile=" . $errFileName
  302. . ", error=" . $errorString);
  303. }
  304. system("echo \"1\" > " . $fileName);
  305. $failedHosts++;
  306. } else {
  307. system("echo \"0\" > " . $fileName);
  308. // write the nodename to the readFromFile file.
  309. fwrite($nodeFileOut, $hostName."\n");
  310. $successfulHosts++;
  311. }
  312. }
  313. fclose($nodeFileOut);
  314. $logger->log_debug("Updating DB for hosts discovery status for puppet agent cert signing");
  315. $ret = $dbAccessor->updateHostDiscoveryStatus($clusterName, $updateHosts);
  316. if ($ret["result"] != 0) {
  317. $logger->log_error("Failed to update DB for hosts status, error="
  318. . $ret["error"]);
  319. // TODO - handle failure?
  320. }
  321. $opStatus = "SUCCESS";
  322. if ($totalHosts > 0) {
  323. if ($successfulHosts == 0) {
  324. $opStatus = "TOTALFAILURE";
  325. } else if ($failedHosts > 0) {
  326. $opStatus = "FAILED";
  327. }
  328. }
  329. $logger->log_info("Puppet finalize, succeeded for " . $successfulHosts
  330. . " and failed for " . $failedHosts . " of total " . $totalHosts . " hosts");
  331. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, $opStatus);
  332. if ($subTransactionReturnValue["result"] != 0 ) {
  333. $logger->log_error("Got error while updating subTxn: ".$subTransactionReturnValue["error"]);
  334. print json_encode($subTransactionReturnValue);
  335. return;
  336. }
  337. $logger->log_info("Completed signing of certs for puppet agents, opStatus=" . $opStatus);
  338. ?>