verifyAndUpdateNodesInfo.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. <?php
  2. /*
  3. *
  4. * Licensed to the Apache Software Foundation (ASF) under one
  5. * or more contributor license agreements. See the NOTICE file
  6. * distributed with this work for additional information
  7. * regarding copyright ownership. The ASF licenses this file
  8. * to you under the Apache License, Version 2.0 (the
  9. * "License"); you may not use this file except in compliance
  10. * with the License. You may obtain a copy of the License at
  11. *
  12. * http://www.apache.org/licenses/LICENSE-2.0
  13. *
  14. * Unless required by applicable law or agreed to in writing,
  15. * software distributed under the License is distributed on an
  16. * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  17. * KIND, either express or implied. See the License for the
  18. * specific language governing permissions and limitations
  19. * under the License.
  20. *
  21. */
  22. include_once '../util/Logger.php';
  23. include_once '../conf/Config.inc';
  24. include_once 'localDirs.php';
  25. include_once "../util/lock.php";
  26. include_once '../db/HMCDBAccessor.php';
  27. include_once 'commandUtils.php';
  28. include_once '../util/util.php';
  29. $logger = new HMCLogger("verifyAndUpdateNodesInfo");
  30. $dbAccessor = new HMCDBAccessor($GLOBALS["DB_PATH"]);
  31. $lineSeparatorPattern = "HDP-------HDP";
  32. $clusterName = $argv[1];
  33. $deployUser = $argv[2];
  34. $rootTxnId = $argv[3];
  35. $mySubTxnId = $argv[4];
  36. $parentSubTxnId = $argv[5];
  37. $readFromFile = $argv[6];
  38. $stageName = "verifyAndUpdateNodesInfo";
  39. $prevStageName = "obtainNodesInfo";
  40. $clusterDir = getClusterDir($clusterName);
  41. $prevOutputDir = $clusterDir . $prevStageName . "/";
  42. $outputDir = $clusterDir . $stageName . "/";
  43. $logger->log_debug("OutputDir is : $outputDir");
  44. if (is_dir($outputDir)) {
  45. rrmdir($outputDir);
  46. }
  47. mkdir($outputDir);
  48. $allHosts = array();
  49. $allBadHosts = array();
  50. function updateStatusForNode ($outDir, $nodeName, $status, $error = "")
  51. {
  52. global $logger;
  53. $outArray = array();
  54. $doneFileName = $outDir . $nodeName . ".done";
  55. $outFileName = $outDir . $nodeName . ".out";
  56. $errFileName = $outDir . $nodeName . ".err";
  57. $logger->log_debug("done file name is $doneFileName");
  58. $fd = fopen($doneFileName, "w");
  59. if ($fd == FALSE) {
  60. $outArray['success'] = FALSE;
  61. $outArray['reason'] = "Failed to update done status: $status for node: $nodeName";
  62. return $outArray;
  63. }
  64. $retval = fwrite($fd, $status);
  65. if ($retval == FALSE) {
  66. $outArray['success'] = FALSE;
  67. $outArray['reason'] = "Failed to write done status: $status for node: $nodeName";
  68. return $outArray;
  69. } else {
  70. $outArray['success'] = TRUE;
  71. }
  72. fclose($fd);
  73. $fd = fopen($outFileName, "w");
  74. if ($fd == FALSE) {
  75. $outArray['success'] = FALSE;
  76. $outArray['reason'] = "Failed to write out status: $status for node: $nodeName";
  77. return $outArray;
  78. }
  79. $retval = fwrite($fd, $status);
  80. if ($retval == FALSE) {
  81. $outArray['success'] = FALSE;
  82. $outArray['reason'] = "Failed to write out status: $status for node: $nodeName";
  83. } else {
  84. $outArray['success'] = TRUE;
  85. }
  86. fclose($fd);
  87. if ($status != 0) {
  88. $fd = fopen($errFileName, "w");
  89. if ($fd == FALSE) {
  90. $outArray['success'] = FALSE;
  91. $outArray['reason'] = "Failed to write err info: $error for node: $nodeName";
  92. return $outArray;
  93. }
  94. $retval = fwrite($fd, $error);
  95. if ($retval == FALSE) {
  96. $outArray['success'] = FALSE;
  97. $outArray['reason'] = "Failed to write err info: $error for node: $nodeName";
  98. } else {
  99. $outArray['success'] = TRUE;
  100. }
  101. fclose($fd);
  102. }
  103. return $outArray;
  104. }
  105. function updateFailedStatusForNode ($outDir, $nodeName, $error)
  106. {
  107. return updateStatusForNode($outDir, $nodeName, 255, $error);
  108. }
  109. function updateSuccessStatusForNode ($outDir, $nodeName)
  110. {
  111. return updateStatusForNode($outDir, $nodeName, 0);
  112. }
  113. function getBadNodeReason ($count)
  114. {
  115. switch ($count)
  116. {
  117. case 0:
  118. return "Failed to get memory info";
  119. case 1:
  120. return "Failed to get cpu count info";
  121. case 2:
  122. return "Failed to get OS architecture";
  123. case 3:
  124. return "Failed to get mount point info";
  125. case 4:
  126. return "Failed to get OS distribution type";
  127. case 5:
  128. return "Failed to get OS related information";
  129. case 6:
  130. return "Failed to get IP address";
  131. case 7:
  132. return "Failed to get public FQDN";
  133. case 8:
  134. return "Failed to get private FQDN";
  135. default:
  136. return "Unknown error in host discovery";
  137. }
  138. }
  139. function populateVal ($line, $count, $arr)
  140. {
  141. switch ($count)
  142. {
  143. case 0:
  144. $arr["totalMem"] = trim($line);
  145. break;
  146. case 1:
  147. $arr["cpuCount"] = trim($line);
  148. break;
  149. case 2:
  150. $arr["osArch"] = trim($line);
  151. break;
  152. case 3:
  153. $arr["disksInfo"][] = trim($line);
  154. break;
  155. case 4:
  156. if (!isset($arr["osType"])) {
  157. $arr["osType"] = "";
  158. }
  159. $lline = strtolower(trim($line));
  160. if ($lline == "release") {
  161. break;
  162. }
  163. $matches = array();
  164. if (preg_match("/([0-9]+)(\.[0-9]+)?/", $lline, $matches) > 0) {
  165. $lline = $matches[1];
  166. }
  167. $arr["osType"] .= $lline;
  168. break;
  169. case 5:
  170. if (!isset($arr["os"])) {
  171. $arr["os"] = "";
  172. }
  173. if ($arr["os"] != "") {
  174. $arr["os"] .= ";";
  175. }
  176. $arr["os"] .= trim($line);
  177. break;
  178. case 6:
  179. $arr["ip"] = trim($line);
  180. break;
  181. case 7:
  182. if (!isset($arr["attributes"])) {
  183. $arr["attributes"] = array();
  184. }
  185. $arr["attributes"]["publicFQDN"] = strtolower(trim($line));
  186. case 8:
  187. if (!isset($arr["attributes"])) {
  188. $arr["attributes"] = array();
  189. }
  190. $arr["attributes"]["privateFQDN"] = strtolower(trim($line));
  191. default:
  192. break;
  193. }
  194. return $arr;
  195. }
  196. $finalOpStatus = "SUCCESS";
  197. $failedCount = 0;
  198. $successCount = 0;
  199. if ($dirHandle = opendir($prevOutputDir)) {
  200. while (false !== ($entry = readdir($dirHandle))) {
  201. if ($entry == "." || $entry == "..") {
  202. continue;
  203. }
  204. $nodeStatus = "SUCCESS";
  205. // Only consider .out files
  206. if(!preg_match("/.out/", $entry)) {
  207. continue;
  208. }
  209. $nodeName = basename($entry, ".out");
  210. $nodeStatus = "SUCCESS";
  211. $doneFile = $prevOutputDir . $nodeName . ".done";
  212. if (file_exists($doneFile)) {
  213. // Read the contents of the done-file
  214. $doneFileContents = file_get_contents($doneFile);
  215. if (trim($doneFileContents) != "0") {
  216. $failedCount += 1;
  217. $nodeStatus = "FAILED";
  218. $finalOpStatus = "FAILED";
  219. updateFailedStatusForNode($outputDir, $nodeName,
  220. "Command to discover node information failed, exit_code=" . $doneFileContents);
  221. $logger->log_debug( "Contents of done file for $clusterName : $doneFileContents");
  222. }
  223. } else {
  224. $failedCount += 1;
  225. $nodeStatus = "FAILED";
  226. $finalOpStatus = "FAILED";
  227. updateFailedStatusForNode($outputDir, $nodeName,
  228. "Command to discover node information failed, no exit code found");
  229. $logger->log_debug("Update failed because file contents of $doneFile is empty");
  230. }
  231. // Initialize this host's array
  232. $thisHostArray = array();
  233. $thisHostArray["hostName"] = strtolower($nodeName);
  234. $thisHostArray["totalMem"] = 0;
  235. $thisHostArray["cpuCount"] = 0;
  236. $thisHostArray["osArch"] = "";
  237. $thisHostArray["disksInfo"] = array();
  238. $thisHostArray["osType"] = "";
  239. $thisHostArray["os"] = "";
  240. $thisHostArray["ip"] = $nodeName; // To be unique
  241. $thisHostArray["attributes"] = array();
  242. if ($nodeStatus != "FAILED") {
  243. // parse the file for the contents we need
  244. // if any exit value != 0, we need to set the host as bad
  245. $hostOutFd = fopen($prevOutputDir.$entry, "r");
  246. if ($hostOutFd === FALSE) {
  247. $logger->log_error("Failed to open file to read: ". $prevOutputDir.$entry);
  248. $thisHostArray["badHealthReason"] = "No data obtained for host";
  249. $finalOpStatus = "FAILED";
  250. $nodeStatus = "FAILED";
  251. updateFailedStatusForNode($outputDir, $nodeName,
  252. $thisHostArray["badHealthReason"]);
  253. } else {
  254. $goodReturnValCount = 0;
  255. while (!feof($hostOutFd)) {
  256. $line = fgets($hostOutFd, 4096);
  257. if (preg_match("/".$lineSeparatorPattern."0/", $line)) {
  258. $goodReturnValCount += 1;
  259. } else if (preg_match("/".$lineSeparatorPattern."/", $line)) {
  260. // this particular node is dead
  261. // add to db saying failed.
  262. $failedCount += 1;
  263. $thisHostArray["badHealthReason"] = getBadNodeReason($goodReturnValCount);
  264. $finalOpStatus = "FAILED";
  265. $nodeStatus = "FAILED";
  266. updateFailedStatusForNode($outputDir, $nodeName,
  267. $thisHostArray["badHealthReason"]);
  268. // write to file if bad so as to be shown as json in frontend.
  269. array_push($badHostsList, $thisHostArray);
  270. break;
  271. } else {
  272. $thisHostArray = populateVal($line, $goodReturnValCount, $thisHostArray);
  273. }
  274. }
  275. fclose($hostOutFd);
  276. }
  277. if ($nodeStatus == "SUCCESS") {
  278. if ($thisHostArray["osType"] != "redhatenterpriselinuxserver5"
  279. && $thisHostArray["osType"] != "centos5"
  280. && $thisHostArray["osType"] != "redhatenterpriselinuxserver6"
  281. && $thisHostArray["osType"] != "centos6") {
  282. $thisHostArray["badHealthReason"] = "Unsupported OS";
  283. $finalOpStatus = "FAILED";
  284. $nodeStatus = "FAILED";
  285. updateFailedStatusForNode($outputDir, $nodeName,
  286. $thisHostArray["badHealthReason"]);
  287. }
  288. }
  289. if ($nodeStatus == "SUCCESS") {
  290. $successCount += 1;
  291. updateSuccessStatusForNode($outputDir, $nodeName);
  292. }
  293. }
  294. $thisHostArray["discoveryStatus"] = $nodeStatus;
  295. $thisHostArray["disksInfo"] = json_encode($thisHostArray["disksInfo"]);
  296. array_push($allHosts, $thisHostArray);
  297. }
  298. closedir($dirHandle);
  299. }
  300. // Perisist the data to the db.
  301. $logger->log_info("Going to persist discovered node properties");
  302. $returnValue = $dbAccessor->addHostsToCluster($clusterName, $allHosts);
  303. if ($returnValue["result"] != 0 ) {
  304. $logger->log_error("Got error while adding hosts: ".$returnValue["error"]);
  305. print json_encode($returnValue);
  306. return;
  307. }
  308. if ($successCount == 0) {
  309. $finalOpStatus = "TOTALFAILURE";
  310. }
  311. $nodeFileOut = fopen($readFromFile, "w");
  312. if ($nodeFileOut == FALSE) {
  313. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, "TOTALFAILURE");
  314. $logger->log_error("Got error while trying to rewrite hosts file");
  315. return;
  316. }
  317. // foreach successfully discovered host write the host list to the readFromFile
  318. foreach ($allHosts as $hostInfo) {
  319. if ($hostInfo["discoveryStatus"] == "FAILED") {
  320. continue;
  321. }
  322. // write the nodename to the readFromFile file.
  323. fwrite($nodeFileOut, $hostInfo["hostName"]."\n");
  324. }
  325. fclose($nodeFileOut);
  326. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, $finalOpStatus);
  327. if ($subTransactionReturnValue["result"] != 0 ) {
  328. $logger->log_error("Got error while updating subTxn: ".$subTransactionReturnValue["error"]);
  329. print json_encode($subTransactionReturnValue);
  330. return;
  331. }
  332. ?>