finalizeNodes.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. <?php
  2. include_once '../util/Logger.php';
  3. include_once '../conf/Config.inc';
  4. include_once 'localDirs.php';
  5. include_once "../util/lock.php";
  6. include_once "../util/util.php";
  7. include_once '../db/HMCDBAccessor.php';
  8. include_once "../util/HMCTxnUtils.php";
  9. include_once 'commandUtils.php';
  10. $dbAccessor = new HMCDBAccessor($GLOBALS["DB_PATH"]);
  11. /* If pattern exists in the output return $ret
  12. If pattern is success pattern then $ret = 0
  13. If pattern is error pattern then $ret = 1
  14. If pattern is empty return the output['retcode']
  15. which is the return code of the remote command
  16. */
  17. function check_error ($output, $pattern, $ret) {
  18. $ret1=($ret+1)%2;
  19. for ($i=0; $i<count($output); $i++) {
  20. if (preg_match ("/$pattern/", $output[$i])) {
  21. return $ret;
  22. }
  23. }
  24. return $ret1;
  25. }
  26. function ping($host,$port=8139,$timeout=10, &$errstr, &$errno)
  27. {
  28. $fsock = fsockopen($host, $port, $errno, $errstr, $timeout);
  29. if ( ! $fsock )
  30. {
  31. return FALSE;
  32. }
  33. else
  34. {
  35. return TRUE;
  36. }
  37. }
  38. /* Sign and verify puppet agent */
  39. function sign_and_verify_agent ($hosts, $logger) {
  40. $origHosts = $hosts;
  41. $totalCnt = count($hosts);
  42. $output = array();
  43. $hostsState = array();
  44. foreach ($hosts as $host) {
  45. $hostsState[$host] = FALSE;
  46. }
  47. $logger->log_info("Starting sign/verify puppet agent for " . $totalCnt
  48. . " nodes, hosts=" . implode(",", $origHosts));
  49. $signed_hosts = array();
  50. $logger->log_info("Getting puppet master list to find all signed agents");
  51. $cmd = "puppet cert --confdir=/etc/puppet/master list --all | grep \"^+ \"";
  52. exec ($cmd, $signed_hosts, $err);
  53. for ($k=0; $k<count($signed_hosts); $k++) {
  54. foreach ($hosts as $i => $host) {
  55. $host = trim($host);
  56. if (preg_match ("/$host/", $signed_hosts[$k])) {
  57. unset($hosts[$i]);
  58. $hostsState[$host] = TRUE;
  59. }
  60. }
  61. }
  62. $waitLoops = 10;
  63. $sleepInterval = 3;
  64. $waitLoop = 0;
  65. $logger->log_info("Looping through until all puppet agents are signed");
  66. for($waitLoop = 0; $waitLoop < $waitLoops; $waitLoop++) {
  67. $waitSecs = $waitLoop * $sleepInterval;
  68. if ($waitLoop > 0) {
  69. $logger->log_info("Waited " . $waitSecs . " seconds for puppet cert sign"
  70. . ", hostsRemaining=" . count($hosts)
  71. . ", totalHosts=" . $totalCnt
  72. . ", totalWaitedTimeSeconds=" . $waitSecs);
  73. }
  74. $unsigned_hosts = array();
  75. $cmd = "puppet cert --confdir=/etc/puppet/master list --all | grep -v \"^+ \"";
  76. exec ($cmd, $unsigned_hosts, $err);
  77. foreach ($hosts as $i => $host) {
  78. $host = trim($host);
  79. for ($j=0; $j<count($unsigned_hosts); $j++) {
  80. if (preg_match ("/$host/", $unsigned_hosts[$j])) {
  81. $logger->log_debug("Signing certificate for ".$host."\n");
  82. $out_arr = array();
  83. $cmd = "puppet cert --confdir=/etc/puppet/master sign $host";
  84. exec ($cmd, $out_arr, $retcode);
  85. if ($retcode != 0) {
  86. $logger->log_error("Failed to sign cert for host " . $host);
  87. $output[$host] =
  88. array ( "discoveryStatus" => "FAILED",
  89. "badHealthReason" => "Puppet cert sign failed: " . implode(";", $out_arr));
  90. $hostsState[$host] = FALSE;
  91. } else {
  92. $logger->log_info("Puppet cert sign succeeded for host " . $host);
  93. $hostsState[$host] = TRUE;
  94. if (isset($output[$host])) {
  95. unset($output[$host]);
  96. }
  97. }
  98. unset($hosts[$i]);
  99. break;
  100. }
  101. }
  102. }
  103. if (empty($hosts)) {
  104. break;
  105. }
  106. sleep($sleepInterval);
  107. }
  108. if ($waitLoop == 10) {
  109. $logger->log_error("Timed out waiting for all puppet agents to ping master");
  110. }
  111. // re-check if the hosts are now signed
  112. $logger->log_info("Re-checking to ensure all puppet hosts are signed");
  113. $signed_hosts = array();
  114. $cmd = "puppet cert --confdir=/etc/puppet/master list --all | grep \"^+ \"";
  115. exec ($cmd, $signed_hosts, $err);
  116. for ($k=0; $k<count($signed_hosts); $k++) {
  117. foreach ($hostsState as $hostName => $state) {
  118. $hostName = trim($hostName);
  119. if (preg_match ("/$hostName/", $signed_hosts[$k])) {
  120. $logger->log_info("Puppet cert signed for host " . $host);
  121. $hostsState[$host] = TRUE;
  122. if (isset($output[$host])) {
  123. unset($output[$host]);
  124. }
  125. }
  126. }
  127. }
  128. $countFailed = 0;
  129. $countSucceeded = 0;
  130. foreach ($hostsState as $hostName => $state) {
  131. if ($state) {
  132. $countSucceeded++;
  133. } else {
  134. $countFailed++;
  135. }
  136. }
  137. $logger->log_info("Puppet cert sign status"
  138. . ", totalHosts=" . $totalCnt
  139. . ", succeededHostsCount=" . $countSucceeded
  140. . ", failedHostsCount=" . $countFailed);
  141. sleep(5);
  142. // run multiple attempts for pings to handle intermittent failures
  143. $pendingNodes = array();
  144. foreach ($origHosts as $i => $host) {
  145. $host = trim($host);
  146. if (array_key_exists ($host , $output)) {
  147. continue;
  148. }
  149. array_push($pendingNodes, $host);
  150. }
  151. $retryAttempt = 0;
  152. do {
  153. $retryAttempt++;
  154. if ($retryAttempt > 1) {
  155. // keep a small sleep between retries
  156. // no sleep on first loop
  157. sleep(3);
  158. }
  159. $logger->log_debug("Puppet kick --ping retry attempt " . $retryAttempt
  160. . ", pendingHoststoCheck=" . implode(",", $pendingNodes));
  161. $failedNodes = array();
  162. $pHostOutput = array();
  163. $pHostResponse = array();
  164. foreach ($pendingNodes as $i => $host) {
  165. /* Give ping agent check if it is working */
  166. $logger->log_debug("Pinging puppet agent for host=".$host);
  167. $errstr = "";
  168. $errno = "";
  169. ping($host, 8139, 10, $errstr, $errno);
  170. $pHostOutput[$host] = $errstr;
  171. $pHostResponse[$host] = $errno;
  172. }
  173. foreach ($pendingNodes as $i => $host) {
  174. if ($pHostResponse[$host] == 0) {
  175. $logger->log_info("Ping to puppet agent succeeded for host [" . $host . "]");
  176. $hostsState[$host] = TRUE;
  177. if (isset($output[$host])) {
  178. unset($output[$host]);
  179. }
  180. } else {
  181. $logger->log_error("Failed to ping puppet agent on host [" . $host . "]: " . $pHostOutput[$host]);
  182. $failedNodes[] = $host;
  183. $errorCode = $pHostResponse[$host];
  184. $errorLogs = "Puppet agent ping failed: [" . $pHostOutput[$host] . "]";
  185. if (!isset($output[$host])) {
  186. $output[$host] =
  187. array ( "discoveryStatus" => "FAILED",
  188. "badHealthReason" => "Puppet agent ping failed: "
  189. . ", error=" . $errorCode
  190. . ", outputLogs=" . $errorLogs);
  191. }
  192. $hostsState[$host] = FALSE;
  193. }
  194. }
  195. $pendingNodes = $failedNodes;
  196. } while (!empty($pendingNodes) && $retryAttempt < 3);
  197. $countFailed = 0;
  198. $countSucceeded = 0;
  199. foreach ($hostsState as $hostName => $state) {
  200. if ($state) {
  201. $countSucceeded++;
  202. } else {
  203. $countFailed++;
  204. }
  205. }
  206. $logger->log_info("Puppet agent ping status"
  207. . ", totalHosts=" . $totalCnt
  208. . ", succeededHostsCount=" . $countSucceeded
  209. . ", failedHostsCount=" . $countFailed);
  210. $response = array();
  211. foreach ($hostsState as $host => $state) {
  212. if (!$state) {
  213. if (!isset($output[$host])) {
  214. $logger->log_error("Timed out waiting for puppet agent on host " . $host);
  215. $response[$host] = array ( "discoveryStatus" => "FAILED",
  216. "badHealthReason" => "Puppet cert sign timed out");
  217. } else {
  218. $response[$host] = $output[$host];
  219. }
  220. } else {
  221. $response[$host] = array ( "discoveryStatus" => "SUCCESS",
  222. "badHealthReason" => "");
  223. }
  224. }
  225. $logger->log_info("Completed sign/verify puppet agent for "
  226. . count($response) . " nodes"
  227. . ", result=" . print_r($response, true));
  228. return $response;
  229. }
  230. $clusterName = $argv[1];
  231. $deployUser = $argv[2];
  232. $rootTxnId = $argv[3];
  233. $mySubTxnId = $argv[4];
  234. $parentSubTxnId = $argv[5];
  235. $readFromFile = $argv[6];
  236. $hosts = readHostsFile($readFromFile);
  237. $hosts = convertToLowerCase($hosts);
  238. $totalHosts = count($hosts);
  239. $logger = new HMCLogger("PuppetFinalize:txnId="
  240. . $rootTxnId . ":subTxnId=" . $mySubTxnId);
  241. $logger->log_info("Starting signing of puppet agents certs for "
  242. . count($hosts) . " hosts");
  243. $opStatus = "STARTED";
  244. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, $opStatus);
  245. if ($subTransactionReturnValue["result"] != 0 ) {
  246. $logger->log_error("Got error while updating subTxn: ".$subTransactionReturnValue["error"]);
  247. print json_encode($subTransactionReturnValue);
  248. return;
  249. }
  250. // Create progress files for UI to track
  251. $operationName = "finalizeNodes";
  252. $clusterDir = getClusterDir($clusterName);
  253. $myDir = $clusterDir . $operationName . "/";
  254. if (is_dir($myDir)) {
  255. rrmdir($myDir);
  256. }
  257. mkdir($myDir);
  258. foreach ($hosts as $host) {
  259. $fileName = $myDir . "/" . $host . ".out";
  260. $h = fopen($fileName, "a");
  261. if ($h !== FALSE) {
  262. fclose($h);
  263. }
  264. }
  265. $result = sign_and_verify_agent ($hosts,$logger);
  266. $logger->log_debug("Puppet Cert Sign Result:\n".print_r($result, true));
  267. $nodeFileOut = fopen($readFromFile, "w");
  268. if ($nodeFileOut == FALSE) {
  269. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, "TOTALFAILURE");
  270. $logger->log_error("Got error while trying to rewrite hosts file");
  271. return;
  272. }
  273. $updateHosts = array();
  274. $failedHosts = 0;
  275. $successfulHosts = 0;
  276. foreach ($result as $hostName => $hostInfo) {
  277. $fileName = $myDir . "/" . $hostName . ".done";
  278. $errFileName = $myDir . "/" . $hostName . ".err";
  279. if ($hostInfo["discoveryStatus"] == "FAILED") {
  280. $updateHosts[$hostName] = $hostInfo;
  281. $errorString = $hostInfo["badHealthReason"];
  282. $f = fopen($errFileName, "w");
  283. if ($f !== FALSE) {
  284. for ($written = 0; $written < strlen($errorString);) {
  285. $writtenBytes = fwrite($f, substr($errorString, $written));
  286. if ($writtenBytes === FALSE) {
  287. $logger->log_error("Failed to write error file for puppet cert sign failure"
  288. . ", host=" . $hostName
  289. . ", errFile=" . $errFileName
  290. . ", error=" . $errorString);
  291. break;
  292. }
  293. $written += $writtenBytes;
  294. }
  295. fflush($f);
  296. fclose($f);
  297. } else {
  298. $logger->log_error("Failed to write error file for puppet cert sign failure"
  299. . ", host=" . $hostName
  300. . ", errFile=" . $errFileName
  301. . ", error=" . $errorString);
  302. }
  303. system("echo \"1\" > " . $fileName);
  304. $failedHosts++;
  305. } else {
  306. system("echo \"0\" > " . $fileName);
  307. // write the nodename to the readFromFile file.
  308. fwrite($nodeFileOut, $hostName."\n");
  309. $successfulHosts++;
  310. }
  311. }
  312. fclose($nodeFileOut);
  313. $logger->log_debug("Updating DB for hosts discovery status for puppet agent cert signing");
  314. $ret = $dbAccessor->updateHostDiscoveryStatus($clusterName, $updateHosts);
  315. if ($ret["result"] != 0) {
  316. $logger->log_error("Failed to update DB for hosts status, error="
  317. . $ret["error"]);
  318. // TODO - handle failure?
  319. }
  320. $opStatus = "SUCCESS";
  321. if ($totalHosts > 0) {
  322. if ($successfulHosts == 0) {
  323. $opStatus = "TOTALFAILURE";
  324. } else if ($failedHosts > 0) {
  325. $opStatus = "FAILED";
  326. }
  327. }
  328. $logger->log_info("Puppet finalize, succeeded for " . $successfulHosts
  329. . " and failed for " . $failedHosts . " of total " . $totalHosts . " hosts");
  330. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, $opStatus);
  331. if ($subTransactionReturnValue["result"] != 0 ) {
  332. $logger->log_error("Got error while updating subTxn: ".$subTransactionReturnValue["error"]);
  333. print json_encode($subTransactionReturnValue);
  334. return;
  335. }
  336. $logger->log_info("Completed signing of certs for puppet agents, opStatus=" . $opStatus);
  337. ?>