finalizeNodes.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. <?php
  2. /*
  3. *
  4. * Licensed to the Apache Software Foundation (ASF) under one
  5. * or more contributor license agreements. See the NOTICE file
  6. * distributed with this work for additional information
  7. * regarding copyright ownership. The ASF licenses this file
  8. * to you under the Apache License, Version 2.0 (the
  9. * "License"); you may not use this file except in compliance
  10. * with the License. You may obtain a copy of the License at
  11. *
  12. * http://www.apache.org/licenses/LICENSE-2.0
  13. *
  14. * Unless required by applicable law or agreed to in writing,
  15. * software distributed under the License is distributed on an
  16. * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  17. * KIND, either express or implied. See the License for the
  18. * specific language governing permissions and limitations
  19. * under the License.
  20. *
  21. */
  22. include_once '../util/Logger.php';
  23. include_once '../conf/Config.inc';
  24. include_once 'localDirs.php';
  25. include_once "../util/lock.php";
  26. include_once "../util/util.php";
  27. include_once '../db/HMCDBAccessor.php';
  28. include_once "../util/HMCTxnUtils.php";
  29. include_once 'commandUtils.php';
  30. $dbAccessor = new HMCDBAccessor($GLOBALS["DB_PATH"]);
  31. /* If pattern exists in the output return $ret
  32. If pattern is success pattern then $ret = 0
  33. If pattern is error pattern then $ret = 1
  34. If pattern is empty return the output['retcode']
  35. which is the return code of the remote command
  36. */
  37. function check_error ($output, $pattern, $ret) {
  38. $ret1=($ret+1)%2;
  39. for ($i=0; $i<count($output); $i++) {
  40. if (preg_match ("/$pattern/", $output[$i])) {
  41. return $ret;
  42. }
  43. }
  44. return $ret1;
  45. }
  46. function ping($host,$port=8139,$timeout=10, &$errstr, &$errno)
  47. {
  48. $fsock = fsockopen($host, $port, $errno, $errstr, $timeout);
  49. if ( ! $fsock )
  50. {
  51. return FALSE;
  52. }
  53. else
  54. {
  55. fclose($fsock);
  56. return TRUE;
  57. }
  58. }
  59. /* Sign and verify puppet agent */
  60. function sign_and_verify_agent ($hosts, $logger) {
  61. $origHosts = $hosts;
  62. $totalCnt = count($hosts);
  63. $output = array();
  64. $hostsState = array();
  65. foreach ($hosts as $host) {
  66. $hostsState[$host] = FALSE;
  67. }
  68. $logger->log_info("Starting sign/verify puppet agent for " . $totalCnt
  69. . " nodes, hosts=" . implode(",", $origHosts));
  70. $signed_hosts = array();
  71. $logger->log_info("Getting puppet master list to find all signed agents");
  72. $cmd = "puppet cert --confdir=/etc/puppet/master list --all | grep \"^+ \"";
  73. exec ($cmd, $signed_hosts, $err);
  74. for ($k=0; $k<count($signed_hosts); $k++) {
  75. foreach ($hosts as $i => $host) {
  76. $host = trim($host);
  77. if (preg_match ("/$host/", $signed_hosts[$k])) {
  78. unset($hosts[$i]);
  79. $hostsState[$host] = TRUE;
  80. }
  81. }
  82. }
  83. $waitLoops = 10;
  84. $sleepInterval = 3;
  85. $waitLoop = 0;
  86. $logger->log_info("Looping through until all puppet agents are signed");
  87. for($waitLoop = 0; $waitLoop < $waitLoops; $waitLoop++) {
  88. $waitSecs = $waitLoop * $sleepInterval;
  89. if ($waitLoop > 0) {
  90. $logger->log_info("Waited " . $waitSecs . " seconds for puppet cert sign"
  91. . ", hostsRemaining=" . count($hosts)
  92. . ", totalHosts=" . $totalCnt
  93. . ", totalWaitedTimeSeconds=" . $waitSecs);
  94. }
  95. $unsigned_hosts = array();
  96. $cmd = "puppet cert --confdir=/etc/puppet/master list --all | grep -v \"^+ \"";
  97. exec ($cmd, $unsigned_hosts, $err);
  98. foreach ($hosts as $i => $host) {
  99. $host = trim($host);
  100. for ($j=0; $j<count($unsigned_hosts); $j++) {
  101. if (preg_match ("/$host/", $unsigned_hosts[$j])) {
  102. $logger->log_debug("Signing certificate for ".$host."\n");
  103. $out_arr = array();
  104. $cmd = "puppet cert --confdir=/etc/puppet/master sign $host";
  105. exec ($cmd, $out_arr, $retcode);
  106. if ($retcode != 0) {
  107. $logger->log_error("Failed to sign cert for host " . $host);
  108. $output[$host] =
  109. array ( "discoveryStatus" => "FAILED",
  110. "badHealthReason" => "Puppet cert sign failed: " . implode(";", $out_arr));
  111. $hostsState[$host] = FALSE;
  112. } else {
  113. $logger->log_info("Puppet cert sign succeeded for host " . $host);
  114. $hostsState[$host] = TRUE;
  115. if (isset($output[$host])) {
  116. unset($output[$host]);
  117. }
  118. }
  119. unset($hosts[$i]);
  120. break;
  121. }
  122. }
  123. }
  124. if (empty($hosts)) {
  125. break;
  126. }
  127. sleep($sleepInterval);
  128. }
  129. if ($waitLoop == 10) {
  130. $logger->log_error("Timed out waiting for all puppet agents to ping master");
  131. }
  132. // re-check if the hosts are now signed
  133. $logger->log_info("Re-checking to ensure all puppet hosts are signed");
  134. $signed_hosts = array();
  135. $cmd = "puppet cert --confdir=/etc/puppet/master list --all | grep \"^+ \"";
  136. exec ($cmd, $signed_hosts, $err);
  137. for ($k=0; $k<count($signed_hosts); $k++) {
  138. foreach ($hostsState as $hostName => $state) {
  139. $hostName = trim($hostName);
  140. if (preg_match ("/$hostName/", $signed_hosts[$k])) {
  141. $logger->log_info("Puppet cert signed for host " . $host);
  142. $hostsState[$host] = TRUE;
  143. if (isset($output[$host])) {
  144. unset($output[$host]);
  145. }
  146. }
  147. }
  148. }
  149. $countFailed = 0;
  150. $countSucceeded = 0;
  151. foreach ($hostsState as $hostName => $state) {
  152. if ($state) {
  153. $countSucceeded++;
  154. } else {
  155. $countFailed++;
  156. }
  157. }
  158. $logger->log_info("Puppet cert sign status"
  159. . ", totalHosts=" . $totalCnt
  160. . ", succeededHostsCount=" . $countSucceeded
  161. . ", failedHostsCount=" . $countFailed);
  162. sleep(5);
  163. // run multiple attempts for pings to handle intermittent failures
  164. $pendingNodes = array();
  165. foreach ($origHosts as $i => $host) {
  166. $host = trim($host);
  167. if (array_key_exists ($host , $output)) {
  168. continue;
  169. }
  170. array_push($pendingNodes, $host);
  171. }
  172. $retryAttempt = 0;
  173. do {
  174. $retryAttempt++;
  175. if ($retryAttempt > 1) {
  176. // keep a small sleep between retries
  177. // no sleep on first loop
  178. sleep(3);
  179. }
  180. $logger->log_debug("Puppet kick --ping retry attempt " . $retryAttempt
  181. . ", pendingHoststoCheck=" . implode(",", $pendingNodes));
  182. $failedNodes = array();
  183. $pHostOutput = array();
  184. $pHostResponse = array();
  185. foreach ($pendingNodes as $i => $host) {
  186. /* Give ping agent check if it is working */
  187. $logger->log_debug("Pinging puppet agent for host=".$host);
  188. $errstr = "";
  189. $errno = "";
  190. ping($host, 8139, 10, $errstr, $errno);
  191. $pHostOutput[$host] = $errstr;
  192. $pHostResponse[$host] = $errno;
  193. }
  194. foreach ($pendingNodes as $i => $host) {
  195. if ($pHostResponse[$host] == 0) {
  196. $logger->log_info("Ping to puppet agent succeeded for host [" . $host . "]");
  197. $hostsState[$host] = TRUE;
  198. if (isset($output[$host])) {
  199. unset($output[$host]);
  200. }
  201. } else {
  202. $logger->log_error("Failed to ping puppet agent on host [" . $host . "]: " . $pHostOutput[$host]);
  203. $failedNodes[] = $host;
  204. $errorCode = $pHostResponse[$host];
  205. $errorLogs = "Puppet agent ping failed: [" . $pHostOutput[$host] . "]";
  206. if (!isset($output[$host])) {
  207. $output[$host] =
  208. array ( "discoveryStatus" => "FAILED",
  209. "badHealthReason" => "Puppet agent ping failed: "
  210. . ", error=" . $errorCode
  211. . ", outputLogs=" . $errorLogs);
  212. }
  213. $hostsState[$host] = FALSE;
  214. }
  215. }
  216. $pendingNodes = $failedNodes;
  217. } while (!empty($pendingNodes) && $retryAttempt < 3);
  218. $countFailed = 0;
  219. $countSucceeded = 0;
  220. foreach ($hostsState as $hostName => $state) {
  221. if ($state) {
  222. $countSucceeded++;
  223. } else {
  224. $countFailed++;
  225. }
  226. }
  227. $logger->log_info("Puppet agent ping status"
  228. . ", totalHosts=" . $totalCnt
  229. . ", succeededHostsCount=" . $countSucceeded
  230. . ", failedHostsCount=" . $countFailed);
  231. $response = array();
  232. foreach ($hostsState as $host => $state) {
  233. if (!$state) {
  234. if (!isset($output[$host])) {
  235. $logger->log_error("Timed out waiting for puppet agent on host " . $host);
  236. $response[$host] = array ( "discoveryStatus" => "FAILED",
  237. "badHealthReason" => "Puppet cert sign timed out");
  238. } else {
  239. $response[$host] = $output[$host];
  240. }
  241. } else {
  242. $response[$host] = array ( "discoveryStatus" => "SUCCESS",
  243. "badHealthReason" => "");
  244. }
  245. }
  246. $logger->log_info("Completed sign/verify puppet agent for "
  247. . count($response) . " nodes"
  248. . ", result=" . print_r($response, true));
  249. return $response;
  250. }
  251. $clusterName = $argv[1];
  252. $deployUser = $argv[2];
  253. $rootTxnId = $argv[3];
  254. $mySubTxnId = $argv[4];
  255. $parentSubTxnId = $argv[5];
  256. $readFromFile = $argv[6];
  257. $hosts = readHostsFile($readFromFile);
  258. $hosts = convertToLowerCase($hosts);
  259. $totalHosts = count($hosts);
  260. $logger = new HMCLogger("PuppetFinalize:txnId="
  261. . $rootTxnId . ":subTxnId=" . $mySubTxnId);
  262. $logger->log_info("Starting signing of puppet agents certs for "
  263. . count($hosts) . " hosts");
  264. $opStatus = "STARTED";
  265. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, $opStatus);
  266. if ($subTransactionReturnValue["result"] != 0 ) {
  267. $logger->log_error("Got error while updating subTxn: ".$subTransactionReturnValue["error"]);
  268. print json_encode($subTransactionReturnValue);
  269. return;
  270. }
  271. // Create progress files for UI to track
  272. $operationName = "finalizeNodes";
  273. $clusterDir = getClusterDir($clusterName);
  274. $myDir = $clusterDir . $operationName . "/";
  275. if (is_dir($myDir)) {
  276. rrmdir($myDir);
  277. }
  278. mkdir($myDir);
  279. foreach ($hosts as $host) {
  280. $fileName = $myDir . "/" . $host . ".out";
  281. $h = fopen($fileName, "a");
  282. if ($h !== FALSE) {
  283. fclose($h);
  284. }
  285. }
  286. $result = sign_and_verify_agent ($hosts,$logger);
  287. $logger->log_debug("Puppet Cert Sign Result:\n".print_r($result, true));
  288. $nodeFileOut = fopen($readFromFile, "w");
  289. if ($nodeFileOut == FALSE) {
  290. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, "TOTALFAILURE");
  291. $logger->log_error("Got error while trying to rewrite hosts file");
  292. return;
  293. }
  294. $updateHosts = array();
  295. $failedHosts = 0;
  296. $successfulHosts = 0;
  297. foreach ($result as $hostName => $hostInfo) {
  298. $fileName = $myDir . "/" . $hostName . ".done";
  299. $errFileName = $myDir . "/" . $hostName . ".err";
  300. if ($hostInfo["discoveryStatus"] == "FAILED") {
  301. $updateHosts[$hostName] = $hostInfo;
  302. $errorString = $hostInfo["badHealthReason"];
  303. $f = fopen($errFileName, "w");
  304. if ($f !== FALSE) {
  305. for ($written = 0; $written < strlen($errorString);) {
  306. $writtenBytes = fwrite($f, substr($errorString, $written));
  307. if ($writtenBytes === FALSE) {
  308. $logger->log_error("Failed to write error file for puppet cert sign failure"
  309. . ", host=" . $hostName
  310. . ", errFile=" . $errFileName
  311. . ", error=" . $errorString);
  312. break;
  313. }
  314. $written += $writtenBytes;
  315. }
  316. fflush($f);
  317. fclose($f);
  318. } else {
  319. $logger->log_error("Failed to write error file for puppet cert sign failure"
  320. . ", host=" . $hostName
  321. . ", errFile=" . $errFileName
  322. . ", error=" . $errorString);
  323. }
  324. system("echo \"1\" > " . $fileName);
  325. $failedHosts++;
  326. } else {
  327. system("echo \"0\" > " . $fileName);
  328. // write the nodename to the readFromFile file.
  329. fwrite($nodeFileOut, $hostName."\n");
  330. $successfulHosts++;
  331. }
  332. }
  333. fclose($nodeFileOut);
  334. $logger->log_debug("Updating DB for hosts discovery status for puppet agent cert signing");
  335. $ret = $dbAccessor->updateHostDiscoveryStatus($clusterName, $updateHosts);
  336. if ($ret["result"] != 0) {
  337. $logger->log_error("Failed to update DB for hosts status, error="
  338. . $ret["error"]);
  339. // TODO - handle failure?
  340. }
  341. $opStatus = "SUCCESS";
  342. if ($totalHosts > 0) {
  343. if ($successfulHosts == 0) {
  344. $opStatus = "TOTALFAILURE";
  345. } else if ($failedHosts > 0) {
  346. $opStatus = "FAILED";
  347. }
  348. }
  349. $logger->log_info("Puppet finalize, succeeded for " . $successfulHosts
  350. . " and failed for " . $failedHosts . " of total " . $totalHosts . " hosts");
  351. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, $opStatus);
  352. if ($subTransactionReturnValue["result"] != 0 ) {
  353. $logger->log_error("Got error while updating subTxn: ".$subTransactionReturnValue["error"]);
  354. print json_encode($subTransactionReturnValue);
  355. return;
  356. }
  357. $logger->log_info("Completed signing of certs for puppet agents, opStatus=" . $opStatus);
  358. ?>