finalizeNodes.php 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. <?php
  2. include_once '../util/Logger.php';
  3. include_once '../conf/Config.inc';
  4. include_once 'localDirs.php';
  5. include_once "../util/lock.php";
  6. include_once "../util/util.php";
  7. include_once '../db/HMCDBAccessor.php';
  8. include_once "../util/HMCTxnUtils.php";
  9. include_once 'commandUtils.php';
  10. $dbAccessor = new HMCDBAccessor($GLOBALS["DB_PATH"]);
  11. /* If pattern exists in the output return $ret
  12. If pattern is success pattern then $ret = 0
  13. If pattern is error pattern then $ret = 1
  14. If pattern is empty return the output['retcode']
  15. which is the return code of the remote command
  16. */
  17. function check_error ($output, $pattern, $ret) {
  18. $ret1=($ret+1)%2;
  19. for ($i=0; $i<count($output); $i++) {
  20. if (preg_match ("/$pattern/", $output[$i])) {
  21. return $ret;
  22. }
  23. }
  24. return $ret1;
  25. }
  26. /* Sign and verify puppet agent */
  27. function sign_and_verify_agent ($hosts, $logger) {
  28. $origHosts = $hosts;
  29. $totalCnt = count($hosts);
  30. $output = array();
  31. $hostsState = array();
  32. foreach ($hosts as $host) {
  33. $hostsState[$host] = FALSE;
  34. }
  35. $signed_hosts = array();
  36. $logger->log_info("Getting puppet master list to find all signed agents");
  37. $cmd = "puppet cert --confdir=/etc/puppet/master list --all | grep \"^+ \"";
  38. exec ($cmd, $signed_hosts, $err);
  39. for ($k=0; $k<count($signed_hosts); $k++) {
  40. foreach ($hosts as $i => $host) {
  41. $host = trim($host);
  42. if (preg_match ("/$host/", $signed_hosts[$k])) {
  43. unset($hosts[$i]);
  44. $hostsState[$host] = TRUE;
  45. }
  46. }
  47. }
  48. $waitLoops = 10;
  49. $sleepInterval = 3;
  50. $waitLoop = 0;
  51. $logger->log_info("Looping through until all puppet agents are signed");
  52. for($waitLoop = 0; $waitLoop < $waitLoops; $waitLoop++) {
  53. $waitSecs = $waitLoop * $sleepInterval;
  54. if ($waitLoop > 0) {
  55. $logger->log_info("Waited " . $waitSecs . " seconds for puppet cert sign"
  56. . ", hostsRemaining=" . count($hosts)
  57. . ", totalHosts=" . $totalCnt
  58. . ", totalWaitedTimeSeconds=" . $waitSecs);
  59. }
  60. $unsigned_hosts = array();
  61. $cmd = "puppet cert --confdir=/etc/puppet/master list --all | grep -v \"^+ \"";
  62. exec ($cmd, $unsigned_hosts, $err);
  63. foreach ($hosts as $i => $host) {
  64. $host = trim($host);
  65. for ($j=0; $j<count($unsigned_hosts); $j++) {
  66. if (preg_match ("/$host/", $unsigned_hosts[$j])) {
  67. $logger->log_debug("Signing certificate for ".$host."\n");
  68. $out_arr = array();
  69. $cmd = "puppet cert --confdir=/etc/puppet/master sign $host";
  70. exec ($cmd, $out_arr, $retcode);
  71. if ($retcode != 0) {
  72. $logger->log_error("Failed to sign cert for host " . $host);
  73. $output[$host] =
  74. array ( "discoveryStatus" => "FAILED",
  75. "badHealthReason" => "Puppet cert sign failed: " . implode(";", $out_arr));
  76. $hostsState[$host] = FALSE;
  77. } else {
  78. $logger->log_info("Puppet cert sign succeeded for host " . $host);
  79. $hostsState[$host] = TRUE;
  80. if (isset($output[$host])) {
  81. unset($output[$host]);
  82. }
  83. }
  84. unset($hosts[$i]);
  85. break;
  86. }
  87. }
  88. }
  89. if (empty($hosts)) {
  90. break;
  91. }
  92. sleep($sleepInterval);
  93. }
  94. if ($waitLoop == 10) {
  95. $logger->log_error("Timed out waiting for all puppet agents to ping master");
  96. }
  97. // re-check if the hosts are now signed
  98. $logger->log_info("Re-checking to ensure all puppet hosts are signed");
  99. $signed_hosts = array();
  100. $cmd = "puppet cert --confdir=/etc/puppet/master list --all | grep \"^+ \"";
  101. exec ($cmd, $signed_hosts, $err);
  102. for ($k=0; $k<count($signed_hosts); $k++) {
  103. foreach ($hostsState as $hostName => $state) {
  104. $hostName = trim($hostName);
  105. if (preg_match ("/$hostName/", $signed_hosts[$k])) {
  106. $logger->log_info("Puppet cert signed for host " . $host);
  107. $hostsState[$host] = TRUE;
  108. if (isset($output[$host])) {
  109. unset($output[$host]);
  110. }
  111. }
  112. }
  113. }
  114. $countFailed = 0;
  115. $countSucceeded = 0;
  116. foreach ($hostsState as $hostName => $state) {
  117. if ($state) {
  118. $countSucceeded++;
  119. } else {
  120. $countFailed++;
  121. }
  122. }
  123. $logger->log_info("Puppet cert sign status"
  124. . ", totalHosts=" . $totalCnt
  125. . ", succeededHostsCount=" . $countSucceeded
  126. . ", failedHostsCount=" . $countFailed);
  127. sleep(5);
  128. foreach ($origHosts as $i => $host) {
  129. $host = trim($host);
  130. if (array_key_exists ($host , $output)) {
  131. continue;
  132. }
  133. /* Give puppet kick --ping to check if agent is working */
  134. $logger->log_debug("Puppet kick --ping for ".$host."\n");
  135. $out_arr = array();
  136. $cmd = "puppet kick -f --host $host --ping 2>/dev/null";
  137. exec ($cmd, $out_arr, $err);
  138. if ($err == 0 && check_error($out_arr, "status is success", 0) == 0) {
  139. // success
  140. $logger->log_info("Puppet kick succeeded for host " . $host);
  141. $hostsState[$host] = TRUE;
  142. if (isset($output[$host])) {
  143. unset($output[$host]);
  144. }
  145. } else {
  146. $logger->log_error("Failed to do puppet kick -ping on host " . $host);
  147. if (!isset($output[$host])) {
  148. $output[$host] =
  149. array ( "discoveryStatus" => "FAILED",
  150. "badHealthReason" => "Puppet kick failed: "
  151. . ", error=" . $err . ", outputLogs="
  152. . implode(";", $out_arr));
  153. }
  154. $hostsState[$host] = FALSE;
  155. }
  156. }
  157. $countFailed = 0;
  158. $countSucceeded = 0;
  159. foreach ($hostsState as $hostName => $state) {
  160. if ($state) {
  161. $countSucceeded++;
  162. } else {
  163. $countFailed++;
  164. }
  165. }
  166. $logger->log_info("Puppet kick status"
  167. . ", totalHosts=" . $totalCnt
  168. . ", succeededHostsCount=" . $countSucceeded
  169. . ", failedHostsCount=" . $countFailed);
  170. $response = array();
  171. foreach ($hostsState as $host => $state) {
  172. if (!$state) {
  173. if (!isset($output[$host])) {
  174. $logger->log_error("Timed out waiting for puppet agent on host " . $host);
  175. $response[$host] = array ( "discoveryStatus" => "FAILED",
  176. "badHealthReason" => "Puppet cert sign timed out");
  177. } else {
  178. $response[$host] = $output[$host];
  179. }
  180. } else {
  181. $response[$host] = array ( "discoveryStatus" => "SUCCESS",
  182. "badHealthReason" => "");
  183. }
  184. }
  185. return $response;
  186. }
  187. $clusterName = $argv[1];
  188. $deployUser = $argv[2];
  189. $rootTxnId = $argv[3];
  190. $mySubTxnId = $argv[4];
  191. $parentSubTxnId = $argv[5];
  192. $readFromFile = $argv[6];
  193. $hosts = readHostsFile($readFromFile);
  194. $hosts = convertToLowerCase($hosts);
  195. $totalHosts = count($hosts);
  196. $logger = new HMCLogger("PuppetFinalize:txnId="
  197. . $rootTxnId . ":subTxnId=" . $mySubTxnId);
  198. $logger->log_info("Starting signing of puppet agents certs for "
  199. . count($hosts) . " hosts");
  200. $opStatus = "STARTED";
  201. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, $opStatus);
  202. if ($subTransactionReturnValue["result"] != 0 ) {
  203. $logger->log_error("Got error while updating subTxn: ".$subTransactionReturnValue["error"]);
  204. print json_encode($subTransactionReturnValue);
  205. return;
  206. }
  207. // Create progress files for UI to track
  208. $operationName = "finalizeNodes";
  209. $clusterDir = getClusterDir($clusterName);
  210. $myDir = $clusterDir . $operationName . "/";
  211. if (is_dir($myDir)) {
  212. rrmdir($myDir);
  213. }
  214. mkdir($myDir);
  215. foreach ($hosts as $host) {
  216. $fileName = $myDir . "/" . $host . ".out";
  217. $h = fopen($fileName, "a");
  218. if ($h !== FALSE) {
  219. fclose($h);
  220. }
  221. }
  222. $result = sign_and_verify_agent ($hosts,$logger);
  223. $logger->log_debug("Puppet Cert Sign Result:\n".print_r($result, true));
  224. $nodeFileOut = fopen($readFromFile, "w");
  225. if ($nodeFileOut == FALSE) {
  226. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, "TOTALFAILURE");
  227. $logger->log_error("Got error while trying to rewrite hosts file");
  228. return;
  229. }
  230. $updateHosts = array();
  231. $failedHosts = 0;
  232. $successfulHosts = 0;
  233. foreach ($result as $hostName => $hostInfo) {
  234. $fileName = $myDir . "/" . $hostName . ".done";
  235. $errFileName = $myDir . "/" . $hostName . ".err";
  236. if ($hostInfo["discoveryStatus"] == "FAILED") {
  237. $updateHosts[$hostName] = $hostInfo;
  238. $errorString = $hostInfo["badHealthReason"];
  239. $f = fopen($errFileName, "w");
  240. if ($f !== FALSE) {
  241. for ($written = 0; $written < strlen($errorString);) {
  242. $writtenBytes = fwrite($f, substr($errorString, $written));
  243. if ($writtenBytes === FALSE) {
  244. $logger->log_error("Failed to write error file for puppet cert sign failure"
  245. . ", host=" . $hostName
  246. . ", errFile=" . $errFileName
  247. . ", error=" . $errorString);
  248. break;
  249. }
  250. $written += $writtenBytes;
  251. }
  252. fflush($f);
  253. fclose($f);
  254. } else {
  255. $logger->log_error("Failed to write error file for puppet cert sign failure"
  256. . ", host=" . $hostName
  257. . ", errFile=" . $errFileName
  258. . ", error=" . $errorString);
  259. }
  260. system("echo \"1\" > " . $fileName);
  261. $failedHosts++;
  262. } else {
  263. system("echo \"0\" > " . $fileName);
  264. // write the nodename to the readFromFile file.
  265. fwrite($nodeFileOut, $hostName."\n");
  266. $successfulHosts++;
  267. }
  268. }
  269. fclose($nodeFileOut);
  270. $logger->log_debug("Updating DB for hosts discovery status for puppet agent cert signing");
  271. $ret = $dbAccessor->updateHostDiscoveryStatus($clusterName, $updateHosts);
  272. if ($ret["result"] != 0) {
  273. $logger->log_error("Failed to update DB for hosts status, error="
  274. . $ret["error"]);
  275. // TODO - handle failure?
  276. }
  277. $opStatus = "SUCCESS";
  278. if ($totalHosts > 0) {
  279. if ($successfulHosts == 0) {
  280. $opStatus = "TOTALFAILURE";
  281. } else if ($failedHosts > 0) {
  282. $opStatus = "FAILED";
  283. }
  284. }
  285. $logger->log_info("Puppet finalize, succeeded for " . $successfulHosts
  286. . " and failed for " . $failedHosts . " of total " . $totalHosts . " hosts");
  287. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, $opStatus);
  288. if ($subTransactionReturnValue["result"] != 0 ) {
  289. $logger->log_error("Got error while updating subTxn: ".$subTransactionReturnValue["error"]);
  290. print json_encode($subTransactionReturnValue);
  291. return;
  292. }
  293. $logger->log_info("Completed signing of certs for puppet agents, opStatus=" . $opStatus);
  294. ?>