rpc_engine.cc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #include "rpc_engine.h"
  19. #include "rpc_connection.h"
  20. #include "common/util.h"
  21. #include "common/logging.h"
  22. #include "common/namenode_info.h"
  23. #include "optional.hpp"
  24. #include <future>
  25. #include <algorithm>
  26. namespace hdfs {
  27. template <class T>
  28. using optional = std::experimental::optional<T>;
  29. HANamenodeTracker::HANamenodeTracker(const std::vector<ResolvedNamenodeInfo> &servers,
  30. ::asio::io_service *ioservice,
  31. std::shared_ptr<LibhdfsEvents> event_handlers)
  32. : enabled_(false), resolved_(false),
  33. ioservice_(ioservice), event_handlers_(event_handlers)
  34. {
  35. LOG_TRACE(kRPC, << "HANamenodeTracker got the following nodes");
  36. for(unsigned int i=0;i<servers.size();i++)
  37. LOG_TRACE(kRPC, << servers[i].str());
  38. if(servers.size() >= 2) {
  39. LOG_TRACE(kRPC, << "Creating HA namenode tracker");
  40. if(servers.size() > 2) {
  41. LOG_WARN(kRPC, << "Nameservice declares more than two nodes. Some won't be used.");
  42. }
  43. active_info_ = servers[0];
  44. standby_info_ = servers[1];
  45. LOG_INFO(kRPC, << "Active namenode url = " << active_info_.uri.str());
  46. LOG_INFO(kRPC, << "Standby namenode url = " << standby_info_.uri.str());
  47. enabled_ = true;
  48. if(!active_info_.endpoints.empty() || !standby_info_.endpoints.empty()) {
  49. resolved_ = true;
  50. }
  51. }
  52. }
  53. HANamenodeTracker::~HANamenodeTracker() { }
  54. static std::string format_endpoints(const std::vector<::asio::ip::tcp::endpoint> &pts) {
  55. std::stringstream ss;
  56. for(unsigned int i=0; i<pts.size(); i++)
  57. if(i == pts.size() - 1)
  58. ss << pts[i];
  59. else
  60. ss << pts[i] << ", ";
  61. return ss.str();
  62. }
  63. // Pass in endpoint from current connection, this will do a reverse lookup
  64. // and return the info for the standby node. It will also swap its state internally.
  65. ResolvedNamenodeInfo HANamenodeTracker::GetFailoverAndUpdate(::asio::ip::tcp::endpoint current_endpoint) {
  66. LOG_TRACE(kRPC, << "Swapping from endpoint " << current_endpoint);
  67. mutex_guard swap_lock(swap_lock_);
  68. ResolvedNamenodeInfo failover_node;
  69. // Connected to standby, switch standby to active
  70. if(IsCurrentActive_locked(current_endpoint)) {
  71. std::swap(active_info_, standby_info_);
  72. if(event_handlers_)
  73. event_handlers_->call(FS_NN_FAILOVER_EVENT, active_info_.nameservice.c_str(),
  74. reinterpret_cast<int64_t>(active_info_.uri.str().c_str()));
  75. failover_node = active_info_;
  76. } else if(IsCurrentStandby_locked(current_endpoint)) {
  77. // Connected to standby
  78. if(event_handlers_)
  79. event_handlers_->call(FS_NN_FAILOVER_EVENT, active_info_.nameservice.c_str(),
  80. reinterpret_cast<int64_t>(active_info_.uri.str().c_str()));
  81. failover_node = active_info_;
  82. } else {
  83. // Invalid state, throw for testing
  84. std::string ep1 = format_endpoints(active_info_.endpoints);
  85. std::string ep2 = format_endpoints(standby_info_.endpoints);
  86. std::stringstream msg;
  87. msg << "Looked for " << current_endpoint << " in\n";
  88. msg << ep1 << " and\n";
  89. msg << ep2 << std::endl;
  90. LOG_ERROR(kRPC, << "Unable to find RPC connection in config " << msg.str() << ". Bailing out.");
  91. throw std::runtime_error(msg.str());
  92. }
  93. if(failover_node.endpoints.empty()) {
  94. LOG_WARN(kRPC, << "No endpoints for node " << failover_node.uri.str() << " attempting to resolve again");
  95. if(!ResolveInPlace(ioservice_, failover_node)) {
  96. LOG_ERROR(kRPC, << "Fallback endpoint resolution for node " << failover_node.uri.str()
  97. << "failed. Please make sure your configuration is up to date.");
  98. }
  99. }
  100. return failover_node;
  101. }
  102. bool HANamenodeTracker::IsCurrentActive_locked(const ::asio::ip::tcp::endpoint &ep) const {
  103. for(unsigned int i=0;i<active_info_.endpoints.size();i++) {
  104. if(ep.address() == active_info_.endpoints[i].address()) {
  105. if(ep.port() != active_info_.endpoints[i].port())
  106. LOG_WARN(kRPC, << "Port mismatch: " << ep << " vs " << active_info_.endpoints[i] << " trying anyway..");
  107. return true;
  108. }
  109. }
  110. return false;
  111. }
  112. bool HANamenodeTracker::IsCurrentStandby_locked(const ::asio::ip::tcp::endpoint &ep) const {
  113. for(unsigned int i=0;i<standby_info_.endpoints.size();i++) {
  114. if(ep.address() == standby_info_.endpoints[i].address()) {
  115. if(ep.port() != standby_info_.endpoints[i].port())
  116. LOG_WARN(kRPC, << "Port mismatch: " << ep << " vs " << standby_info_.endpoints[i] << " trying anyway..");
  117. return true;
  118. }
  119. }
  120. return false;
  121. }
  122. RpcEngine::RpcEngine(::asio::io_service *io_service, const Options &options,
  123. const std::string &client_name, const std::string &user_name,
  124. const char *protocol_name, int protocol_version)
  125. : io_service_(io_service),
  126. options_(options),
  127. client_name_(client_name),
  128. protocol_name_(protocol_name),
  129. protocol_version_(protocol_version),
  130. call_id_(0),
  131. retry_timer(*io_service),
  132. event_handlers_(std::make_shared<LibhdfsEvents>())
  133. {
  134. LOG_DEBUG(kRPC, << "RpcEngine::RpcEngine called");
  135. auth_info_.setUser(user_name);
  136. if (options.authentication == Options::kKerberos) {
  137. auth_info_.setMethod(AuthInfo::kKerberos);
  138. }
  139. }
  140. void RpcEngine::Connect(const std::string &cluster_name,
  141. const std::vector<ResolvedNamenodeInfo> servers,
  142. RpcCallback &handler) {
  143. std::lock_guard<std::mutex> state_lock(engine_state_lock_);
  144. LOG_DEBUG(kRPC, << "RpcEngine::Connect called");
  145. last_endpoints_ = servers[0].endpoints;
  146. cluster_name_ = cluster_name;
  147. LOG_TRACE(kRPC, << "Got cluster name \"" << cluster_name << "\" in RpcEngine::Connect")
  148. ha_persisted_info_.reset(new HANamenodeTracker(servers, io_service_, event_handlers_));
  149. if(!ha_persisted_info_->is_enabled()) {
  150. ha_persisted_info_.reset();
  151. }
  152. // Construct retry policy after we determine if config is HA
  153. retry_policy_ = std::move(MakeRetryPolicy(options_));
  154. conn_ = InitializeConnection();
  155. conn_->Connect(last_endpoints_, auth_info_, handler);
  156. }
  157. void RpcEngine::Shutdown() {
  158. LOG_DEBUG(kRPC, << "RpcEngine::Shutdown called");
  159. io_service_->post([this]() {
  160. std::lock_guard<std::mutex> state_lock(engine_state_lock_);
  161. conn_.reset();
  162. });
  163. }
  164. std::unique_ptr<const RetryPolicy> RpcEngine::MakeRetryPolicy(const Options &options) {
  165. LOG_DEBUG(kRPC, << "RpcEngine::MakeRetryPolicy called");
  166. if(ha_persisted_info_) {
  167. LOG_INFO(kRPC, << "Cluster is HA configued so policy will default to HA until a knob is implemented");
  168. return std::unique_ptr<RetryPolicy>(new FixedDelayWithFailover(options.rpc_retry_delay_ms,
  169. options.max_rpc_retries,
  170. options.failover_max_retries,
  171. options.failover_connection_max_retries));
  172. } else if (options.max_rpc_retries > 0) {
  173. return std::unique_ptr<RetryPolicy>(new FixedDelayRetryPolicy(options.rpc_retry_delay_ms,
  174. options.max_rpc_retries));
  175. } else {
  176. return nullptr;
  177. }
  178. }
  179. void RpcEngine::TEST_SetRpcConnection(std::shared_ptr<RpcConnection> conn) {
  180. conn_ = conn;
  181. retry_policy_ = std::move(MakeRetryPolicy(options_));
  182. }
  183. void RpcEngine::TEST_SetRetryPolicy(std::unique_ptr<const RetryPolicy> policy) {
  184. retry_policy_ = std::move(policy);
  185. }
  186. std::unique_ptr<const RetryPolicy> RpcEngine::TEST_GenerateRetryPolicyUsingOptions() {
  187. return MakeRetryPolicy(options_);
  188. }
  189. void RpcEngine::AsyncRpc(
  190. const std::string &method_name, const ::google::protobuf::MessageLite *req,
  191. const std::shared_ptr<::google::protobuf::MessageLite> &resp,
  192. const std::function<void(const Status &)> &handler) {
  193. std::lock_guard<std::mutex> state_lock(engine_state_lock_);
  194. LOG_TRACE(kRPC, << "RpcEngine::AsyncRpc called");
  195. if (!conn_) {
  196. conn_ = InitializeConnection();
  197. conn_->ConnectAndFlush(last_endpoints_);
  198. }
  199. conn_->AsyncRpc(method_name, req, resp, handler);
  200. }
  201. Status RpcEngine::Rpc(
  202. const std::string &method_name, const ::google::protobuf::MessageLite *req,
  203. const std::shared_ptr<::google::protobuf::MessageLite> &resp) {
  204. LOG_TRACE(kRPC, << "RpcEngine::Rpc called");
  205. auto stat = std::make_shared<std::promise<Status>>();
  206. std::future<Status> future(stat->get_future());
  207. AsyncRpc(method_name, req, resp,
  208. [stat](const Status &status) { stat->set_value(status); });
  209. return future.get();
  210. }
  211. std::shared_ptr<RpcConnection> RpcEngine::NewConnection()
  212. {
  213. LOG_DEBUG(kRPC, << "RpcEngine::NewConnection called");
  214. return std::make_shared<RpcConnectionImpl<::asio::ip::tcp::socket>>(this);
  215. }
  216. std::shared_ptr<RpcConnection> RpcEngine::InitializeConnection()
  217. {
  218. std::shared_ptr<RpcConnection> result = NewConnection();
  219. result->SetEventHandlers(event_handlers_);
  220. result->SetClusterName(cluster_name_);
  221. return result;
  222. }
  223. void RpcEngine::AsyncRpcCommsError(
  224. const Status &status,
  225. std::shared_ptr<RpcConnection> failedConnection,
  226. std::vector<std::shared_ptr<Request>> pendingRequests) {
  227. LOG_ERROR(kRPC, << "RpcEngine::AsyncRpcCommsError called; status=\"" << status.ToString() << "\" conn=" << failedConnection.get() << " reqs=" << pendingRequests.size());
  228. io_service().post([this, status, failedConnection, pendingRequests]() {
  229. RpcCommsError(status, failedConnection, pendingRequests);
  230. });
  231. }
  232. void RpcEngine::RpcCommsError(
  233. const Status &status,
  234. std::shared_ptr<RpcConnection> failedConnection,
  235. std::vector<std::shared_ptr<Request>> pendingRequests) {
  236. LOG_WARN(kRPC, << "RpcEngine::RpcCommsError called; status=\"" << status.ToString() << "\" conn=" << failedConnection.get() << " reqs=" << pendingRequests.size());
  237. std::lock_guard<std::mutex> state_lock(engine_state_lock_);
  238. // If the failed connection is the current one, shut it down
  239. // It will be reconnected when there is work to do
  240. if (failedConnection == conn_) {
  241. LOG_INFO(kRPC, << "Disconnecting from failed RpcConnection");
  242. conn_.reset();
  243. }
  244. optional<RetryAction> head_action = optional<RetryAction>();
  245. //We are talking to the Standby NN, let's talk to the active one instead.
  246. if(ha_persisted_info_ && status.get_server_exception_type() == Status::kStandbyException) {
  247. LOG_INFO(kRPC, << "Received StandbyException. Failing over.");
  248. head_action = RetryAction::failover(std::max(0,options_.rpc_retry_delay_ms));
  249. } else {
  250. // Filter out anything with too many retries already
  251. for (auto it = pendingRequests.begin(); it < pendingRequests.end();) {
  252. auto req = *it;
  253. LOG_DEBUG(kRPC, << req->GetDebugString());
  254. RetryAction retry = RetryAction::fail(""); // Default to fail
  255. if (retry_policy()) {
  256. retry = retry_policy()->ShouldRetry(status, req->IncrementRetryCount(), req->get_failover_count(), true);
  257. }
  258. if (retry.action == RetryAction::FAIL) {
  259. // If we've exceeded the maximum retry, take the latest error and pass it
  260. // on. There might be a good argument for caching the first error
  261. // rather than the last one, that gets messy
  262. io_service().post([req, status]() {
  263. req->OnResponseArrived(nullptr, status); // Never call back while holding a lock
  264. });
  265. it = pendingRequests.erase(it);
  266. } else {
  267. if (!head_action) {
  268. head_action = retry;
  269. }
  270. ++it;
  271. }
  272. }
  273. }
  274. // If we have reqests that need to be re-sent, ensure that we have a connection
  275. // and send the requests to it
  276. bool haveRequests = !pendingRequests.empty() &&
  277. head_action && head_action->action != RetryAction::FAIL;
  278. if (haveRequests) {
  279. LOG_TRACE(kRPC, << "Have " << pendingRequests.size() << " requests to resend");
  280. bool needNewConnection = !conn_;
  281. if (needNewConnection) {
  282. LOG_DEBUG(kRPC, << "Creating a new NN conection");
  283. // If HA is enabled and we have valid HA info then fail over to the standby (hopefully now active)
  284. if(head_action->action == RetryAction::FAILOVER_AND_RETRY && ha_persisted_info_) {
  285. for(unsigned int i=0; i<pendingRequests.size();i++)
  286. pendingRequests[i]->IncrementFailoverCount();
  287. ResolvedNamenodeInfo new_active_nn_info =
  288. ha_persisted_info_->GetFailoverAndUpdate(last_endpoints_[0]/*reverse lookup*/);
  289. LOG_INFO(kRPC, << "Going to try connecting to alternate Datanode: " << new_active_nn_info.uri.str());
  290. if(ha_persisted_info_->is_resolved()) {
  291. last_endpoints_ = new_active_nn_info.endpoints;
  292. } else {
  293. LOG_WARN(kRPC, << "It looks HA is turned on, but unable to fail over. has info="
  294. << ha_persisted_info_->is_enabled() << " resolved=" << ha_persisted_info_->is_resolved());
  295. }
  296. }
  297. conn_ = InitializeConnection();
  298. conn_->PreEnqueueRequests(pendingRequests);
  299. if (head_action->delayMillis > 0) {
  300. auto weak_conn = std::weak_ptr<RpcConnection>(conn_);
  301. retry_timer.expires_from_now(
  302. std::chrono::milliseconds(head_action->delayMillis));
  303. retry_timer.async_wait([this, weak_conn](asio::error_code ec) {
  304. auto strong_conn = weak_conn.lock();
  305. if ( (!ec) && (strong_conn) ) {
  306. strong_conn->ConnectAndFlush(last_endpoints_);
  307. }
  308. });
  309. } else {
  310. conn_->ConnectAndFlush(last_endpoints_);
  311. }
  312. } else {
  313. // We have an existing connection (which might be closed; we don't know
  314. // until we hold the connection local) and should just add the new requests
  315. conn_->AsyncRpc(pendingRequests);
  316. }
  317. }
  318. }
  319. void RpcEngine::SetFsEventCallback(fs_event_callback callback) {
  320. event_handlers_->set_fs_callback(callback);
  321. }
  322. }