findSshableNodes.php 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. <?php
  2. include_once '../util/Logger.php';
  3. include_once '../conf/Config.inc';
  4. include_once 'localDirs.php';
  5. include_once "../util/lock.php";
  6. include_once '../db/HMCDBAccessor.php';
  7. include_once "../util/util.php";
  8. include_once "../util/HMCTxnUtils.php";
  9. include_once 'commandUtils.php';
  10. $logger = new HMCLogger("findSshableNodes");
  11. $dbAccessor = new HMCDBAccessor($GLOBALS["DB_PATH"]);
  12. function getCommandLine() {
  13. $cmdLine = "hostname ;";
  14. // uncomment following line for demo purposes.
  15. // $cmdLine = $cmdLine . 'sleep $[ $RANDOM % 5 ]; ';
  16. return $cmdLine;
  17. }
  18. $clusterName = $argv[1];
  19. $deployUser = $argv[2];
  20. $rootTxnId = $argv[3];
  21. $mySubTxnId = $argv[4];
  22. $parentSubTxnId = $argv[5];
  23. $readFromFile = $argv[6];
  24. $opStatus = "STARTED";
  25. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, $opStatus);
  26. if ($subTransactionReturnValue["result"] != 0 ) {
  27. $logger->log_error("Got error while updating subTxn: ".$subTransactionReturnValue["error"]);
  28. print json_encode($subTransactionReturnValue);
  29. return;
  30. }
  31. $stageName = "findSshableNodes";
  32. $cmdLine = getCommandLine();
  33. // $hosts = explode(",", $hostsStr);
  34. runPdsh($clusterName, $stageName, $deployUser, $readFromFile, $cmdLine);
  35. ////////////// now read the per-host output files to get ssh-able information about each node ////////////////
  36. $clusterDir = getClusterDir($clusterName);
  37. $commandOutputDir = $clusterDir . $stageName . "/";
  38. $allHosts = array();
  39. $finalOpStatus = "SUCCESS";
  40. $numTotalNodes = 0;
  41. $numNodesSucceeded = 0;
  42. $numNodesFailed = 0;
  43. if ($dirHandle = opendir($commandOutputDir)) {
  44. while (false !== ($entry = readdir($dirHandle))) {
  45. if ($entry == "." || $entry == "..") {
  46. continue;
  47. }
  48. $nodeStatus = "SUCCESS";
  49. // Only consider .out files
  50. if(!preg_match("/.out/", $entry)) {
  51. continue;
  52. }
  53. $nodeName = basename($entry, ".out");
  54. $nodeStatus = "SUCCESS";
  55. $doneFile = $commandOutputDir . $nodeName . ".done";
  56. if (file_exists($doneFile)) {
  57. // Read the contents of the done-file
  58. $doneFileContents = file_get_contents($doneFile);
  59. if (trim($doneFileContents) != "0") {
  60. $numNodesFailed += 1;
  61. $nodeStatus = "FAILED";
  62. $finalOpStatus = "FAILED";
  63. $logger->log_debug( "Contents of done file for $clusterName : $doneFileContents");
  64. }
  65. } else {
  66. $numNodesFailed += 1;
  67. $nodeStatus = "FAILED";
  68. $finalOpStatus = "FAILED";
  69. }
  70. // Initialize this host's array
  71. $thisHostArray = array();
  72. $thisHostArray["hostName"] = strtolower($nodeName);
  73. $thisHostArray["totalMem"] = 0;
  74. $thisHostArray["cpuCount"] = 0;
  75. $thisHostArray["osArch"] = "";
  76. $thisHostArray["disksInfo"] = json_encode(array());
  77. $thisHostArray["osType"] = "";
  78. $thisHostArray["os"] = "";
  79. $thisHostArray["ip"] = $nodeName; // To be unique
  80. if ($nodeStatus != "FAILED") {
  81. $sshContents = file_get_contents($commandOutputDir.$entry);
  82. if ($sshContents == "") {
  83. $numNodesFailed += 1;
  84. $finalOpStatus = "FAILED";
  85. $nodeStatus = "FAILED";
  86. }
  87. }
  88. // since node status can be updated in the above block as well.
  89. if ($nodeStatus != "FAILED") {
  90. $numNodesSucceeded += 1;
  91. } else {
  92. $thisHostArray["badHealthReason"] =
  93. rtrim(file_get_contents($commandOutputDir.$nodeName . ".err"));
  94. }
  95. $thisHostArray["discoveryStatus"] = $nodeStatus;
  96. array_push($allHosts, $thisHostArray);
  97. }
  98. closedir($dirHandle);
  99. }
  100. // Perisist the data to the db.
  101. $logger->log_debug("Going to persist information sshAble nodes");
  102. $returnValue = $dbAccessor->addHostsToCluster($clusterName, $allHosts);
  103. if ($returnValue["result"] != 0 ) {
  104. $logger->log_error("Got error while adding hosts: ".$returnValue["error"]);
  105. print json_encode($returnValue);
  106. return;
  107. }
  108. if ($numNodesSucceeded == 0) {
  109. $finalOpStatus = "TOTALFAILURE";
  110. }
  111. $nodeFileOut = fopen($readFromFile, "w");
  112. if ($nodeFileOut == FALSE) {
  113. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, "TOTALFAILURE");
  114. $logger->log_error("Got error while trying to rewrite hosts file");
  115. return;
  116. }
  117. // foreach successfully discovered host write the host list to the readFromFile
  118. foreach ($allHosts as $hostInfo) {
  119. if ($hostInfo["discoveryStatus"] == "FAILED") {
  120. continue;
  121. }
  122. // write the nodename to the readFromFile file.
  123. fwrite($nodeFileOut, $hostInfo["hostName"]."\n");
  124. }
  125. fclose($nodeFileOut);
  126. $subTransactionReturnValue = $dbAccessor->updateSubTransactionOpStatus($clusterName, $parentSubTxnId, $mySubTxnId, $finalOpStatus);
  127. if ($subTransactionReturnValue["result"] != 0 ) {
  128. $logger->log_error("Got error while updating subTxn: ".$subTransactionReturnValue["error"]);
  129. print json_encode($subTransactionReturnValue);
  130. return;
  131. }
  132. ?>