rpc_connection.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #ifndef LIB_RPC_RPC_CONNECTION_H_
  19. #define LIB_RPC_RPC_CONNECTION_H_
  20. #include "rpc_engine.h"
  21. #include "common/auth_info.h"
  22. #include "common/logging.h"
  23. #include "common/util.h"
  24. #include "common/libhdfs_events_impl.h"
  25. #include "sasl_protocol.h"
  26. #include <asio/connect.hpp>
  27. #include <asio/read.hpp>
  28. #include <asio/write.hpp>
  29. namespace hdfs {
  30. template <class NextLayer>
  31. class RpcConnectionImpl : public RpcConnection {
  32. public:
  33. RpcConnectionImpl(RpcEngine *engine);
  34. virtual ~RpcConnectionImpl() override;
  35. virtual void Connect(const std::vector<::asio::ip::tcp::endpoint> &server,
  36. const AuthInfo & auth_info,
  37. RpcCallback &handler);
  38. virtual void ConnectAndFlush(
  39. const std::vector<::asio::ip::tcp::endpoint> &server) override;
  40. virtual void SendHandshake(RpcCallback &handler) override;
  41. virtual void SendContext(RpcCallback &handler) override;
  42. virtual void Disconnect() override;
  43. virtual void OnSendCompleted(const ::asio::error_code &ec,
  44. size_t transferred) override;
  45. virtual void OnRecvCompleted(const ::asio::error_code &ec,
  46. size_t transferred) override;
  47. virtual void FlushPendingRequests() override;
  48. NextLayer &next_layer() { return next_layer_; }
  49. void TEST_set_connected(bool connected) { connected_ = connected ? kConnected : kNotYetConnected; }
  50. private:
  51. const Options options_;
  52. ::asio::ip::tcp::endpoint current_endpoint_;
  53. std::vector<::asio::ip::tcp::endpoint> additional_endpoints_;
  54. NextLayer next_layer_;
  55. ::asio::deadline_timer connect_timer_;
  56. void ConnectComplete(const ::asio::error_code &ec, const ::asio::ip::tcp::endpoint &remote);
  57. };
  58. template <class NextLayer>
  59. RpcConnectionImpl<NextLayer>::RpcConnectionImpl(RpcEngine *engine)
  60. : RpcConnection(engine),
  61. options_(engine->options()),
  62. next_layer_(engine->io_service()),
  63. connect_timer_(engine->io_service()) {
  64. LOG_TRACE(kRPC, << "RpcConnectionImpl::RpcConnectionImpl called");
  65. }
  66. template <class NextLayer>
  67. RpcConnectionImpl<NextLayer>::~RpcConnectionImpl() {
  68. LOG_DEBUG(kRPC, << "RpcConnectionImpl::~RpcConnectionImpl called &" << (void*)this);
  69. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  70. if (pending_requests_.size() > 0)
  71. LOG_WARN(kRPC, << "RpcConnectionImpl::~RpcConnectionImpl called with items in the pending queue");
  72. if (requests_on_fly_.size() > 0)
  73. LOG_WARN(kRPC, << "RpcConnectionImpl::~RpcConnectionImpl called with items in the requests_on_fly queue");
  74. }
  75. template <class NextLayer>
  76. void RpcConnectionImpl<NextLayer>::Connect(
  77. const std::vector<::asio::ip::tcp::endpoint> &server,
  78. const AuthInfo & auth_info,
  79. RpcCallback &handler) {
  80. LOG_TRACE(kRPC, << "RpcConnectionImpl::Connect called");
  81. this->auth_info_ = auth_info;
  82. auto connectionSuccessfulReq = std::make_shared<Request>(
  83. engine_, [handler](::google::protobuf::io::CodedInputStream *is,
  84. const Status &status) {
  85. (void)is;
  86. handler(status);
  87. });
  88. pending_requests_.push_back(connectionSuccessfulReq);
  89. this->ConnectAndFlush(server); // need "this" so compiler can infer type of CAF
  90. }
  91. template <class NextLayer>
  92. void RpcConnectionImpl<NextLayer>::ConnectAndFlush(
  93. const std::vector<::asio::ip::tcp::endpoint> &server) {
  94. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  95. if (server.empty()) {
  96. Status s = Status::InvalidArgument("No endpoints provided");
  97. CommsError(s);
  98. return;
  99. }
  100. if (connected_ == kConnected) {
  101. FlushPendingRequests();
  102. return;
  103. }
  104. if (connected_ != kNotYetConnected) {
  105. LOG_WARN(kRPC, << "RpcConnectionImpl::ConnectAndFlush called while connected=" << ToString(connected_));
  106. return;
  107. }
  108. connected_ = kConnecting;
  109. // Take the first endpoint, but remember the alternatives for later
  110. additional_endpoints_ = server;
  111. ::asio::ip::tcp::endpoint first_endpoint = additional_endpoints_.front();
  112. additional_endpoints_.erase(additional_endpoints_.begin());
  113. current_endpoint_ = first_endpoint;
  114. auto shared_this = shared_from_this();
  115. next_layer_.async_connect(first_endpoint, [shared_this, this, first_endpoint](const ::asio::error_code &ec) {
  116. ConnectComplete(ec, first_endpoint);
  117. });
  118. // Prompt the timer to timeout
  119. auto weak_this = std::weak_ptr<RpcConnection>(shared_this);
  120. connect_timer_.expires_from_now(
  121. std::chrono::milliseconds(options_.rpc_connect_timeout));
  122. connect_timer_.async_wait([shared_this, this, first_endpoint](const ::asio::error_code &ec) {
  123. if (ec)
  124. ConnectComplete(ec, first_endpoint);
  125. else
  126. ConnectComplete(make_error_code(asio::error::host_unreachable), first_endpoint);
  127. });
  128. }
  129. template <class NextLayer>
  130. void RpcConnectionImpl<NextLayer>::ConnectComplete(const ::asio::error_code &ec, const ::asio::ip::tcp::endpoint & remote) {
  131. auto shared_this = RpcConnectionImpl<NextLayer>::shared_from_this();
  132. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  133. connect_timer_.cancel();
  134. LOG_TRACE(kRPC, << "RpcConnectionImpl::ConnectComplete called");
  135. // Could be an old async connect returning a result after we've moved on
  136. if (remote != current_endpoint_) {
  137. LOG_DEBUG(kRPC, << "Got ConnectComplete for " << remote << " but current_endpoint_ is " << current_endpoint_);
  138. return;
  139. }
  140. if (connected_ != kConnecting) {
  141. LOG_DEBUG(kRPC, << "Got ConnectComplete but current state is " << connected_);;
  142. return;
  143. }
  144. Status status = ToStatus(ec);
  145. if(event_handlers_) {
  146. auto event_resp = event_handlers_->call(FS_NN_CONNECT_EVENT, cluster_name_.c_str(), 0);
  147. #ifndef NDEBUG
  148. if (event_resp.response() == event_response::kTest_Error) {
  149. status = event_resp.status();
  150. }
  151. #endif
  152. }
  153. if (status.ok()) {
  154. StartReading();
  155. SendHandshake([shared_this, this](const Status & s) {
  156. HandshakeComplete(s);
  157. });
  158. } else {
  159. LOG_DEBUG(kRPC, << "Rpc connection failed; err=" << status.ToString());;
  160. std::string err = SafeDisconnect(get_asio_socket_ptr(&next_layer_));
  161. if(!err.empty()) {
  162. LOG_INFO(kRPC, << "Rpc connection failed to connect to endpoint, error closing connection: " << err);
  163. }
  164. if (!additional_endpoints_.empty()) {
  165. // If we have additional endpoints, keep trying until we either run out or
  166. // hit one
  167. ::asio::ip::tcp::endpoint next_endpoint = additional_endpoints_.front();
  168. additional_endpoints_.erase(additional_endpoints_.begin());
  169. current_endpoint_ = next_endpoint;
  170. next_layer_.async_connect(next_endpoint, [shared_this, this, next_endpoint](const ::asio::error_code &ec) {
  171. ConnectComplete(ec, next_endpoint);
  172. });
  173. connect_timer_.expires_from_now(
  174. std::chrono::milliseconds(options_.rpc_connect_timeout));
  175. connect_timer_.async_wait([shared_this, this, next_endpoint](const ::asio::error_code &ec) {
  176. if (ec)
  177. ConnectComplete(ec, next_endpoint);
  178. else
  179. ConnectComplete(make_error_code(asio::error::host_unreachable), next_endpoint);
  180. });
  181. } else {
  182. CommsError(status);
  183. }
  184. }
  185. }
  186. template <class NextLayer>
  187. void RpcConnectionImpl<NextLayer>::SendHandshake(RpcCallback &handler) {
  188. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  189. LOG_TRACE(kRPC, << "RpcConnectionImpl::SendHandshake called");
  190. connected_ = kHandshaking;
  191. auto shared_this = shared_from_this();
  192. auto handshake_packet = PrepareHandshakePacket();
  193. ::asio::async_write(next_layer_, asio::buffer(*handshake_packet),
  194. [handshake_packet, handler, shared_this, this](
  195. const ::asio::error_code &ec, size_t) {
  196. Status status = ToStatus(ec);
  197. handler(status);
  198. });
  199. }
  200. template <class NextLayer>
  201. void RpcConnectionImpl<NextLayer>::SendContext(RpcCallback &handler) {
  202. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  203. LOG_TRACE(kRPC, << "RpcConnectionImpl::SendContext called");
  204. auto shared_this = shared_from_this();
  205. auto context_packet = PrepareContextPacket();
  206. ::asio::async_write(next_layer_, asio::buffer(*context_packet),
  207. [context_packet, handler, shared_this, this](
  208. const ::asio::error_code &ec, size_t) {
  209. Status status = ToStatus(ec);
  210. handler(status);
  211. });
  212. }
  213. template <class NextLayer>
  214. void RpcConnectionImpl<NextLayer>::OnSendCompleted(const ::asio::error_code &ec,
  215. size_t) {
  216. using std::placeholders::_1;
  217. using std::placeholders::_2;
  218. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  219. LOG_TRACE(kRPC, << "RpcConnectionImpl::OnSendCompleted called");
  220. request_over_the_wire_.reset();
  221. if (ec) {
  222. LOG_WARN(kRPC, << "Network error during RPC write: " << ec.message());
  223. CommsError(ToStatus(ec));
  224. return;
  225. }
  226. FlushPendingRequests();
  227. }
  228. template <class NextLayer>
  229. void RpcConnectionImpl<NextLayer>::FlushPendingRequests() {
  230. using namespace ::std::placeholders;
  231. // Lock should be held
  232. assert(lock_held(connection_state_lock_));
  233. LOG_TRACE(kRPC, << "RpcConnectionImpl::FlushPendingRequests called");
  234. // Don't send if we don't need to
  235. if (request_over_the_wire_) {
  236. return;
  237. }
  238. std::shared_ptr<Request> req;
  239. switch (connected_) {
  240. case kNotYetConnected:
  241. return;
  242. case kConnecting:
  243. return;
  244. case kHandshaking:
  245. return;
  246. case kAuthenticating:
  247. if (auth_requests_.empty()) {
  248. return;
  249. }
  250. req = auth_requests_.front();
  251. auth_requests_.erase(auth_requests_.begin());
  252. break;
  253. case kConnected:
  254. if (pending_requests_.empty()) {
  255. return;
  256. }
  257. req = pending_requests_.front();
  258. pending_requests_.erase(pending_requests_.begin());
  259. break;
  260. case kDisconnected:
  261. LOG_DEBUG(kRPC, << "RpcConnectionImpl::FlushPendingRequests attempted to flush a " << ToString(connected_) << " connection");
  262. return;
  263. default:
  264. LOG_DEBUG(kRPC, << "RpcConnectionImpl::FlushPendingRequests invalid state: " << ToString(connected_));
  265. return;
  266. }
  267. std::shared_ptr<RpcConnection> shared_this = shared_from_this();
  268. auto weak_this = std::weak_ptr<RpcConnection>(shared_this);
  269. auto weak_req = std::weak_ptr<Request>(req);
  270. std::shared_ptr<std::string> payload = std::make_shared<std::string>();
  271. req->GetPacket(payload.get());
  272. if (!payload->empty()) {
  273. assert(requests_on_fly_.find(req->call_id()) == requests_on_fly_.end());
  274. requests_on_fly_[req->call_id()] = req;
  275. request_over_the_wire_ = req;
  276. req->timer().expires_from_now(
  277. std::chrono::milliseconds(options_.rpc_timeout));
  278. req->timer().async_wait([weak_this, weak_req, this](const ::asio::error_code &ec) {
  279. auto timeout_this = weak_this.lock();
  280. auto timeout_req = weak_req.lock();
  281. if (timeout_this && timeout_req)
  282. this->HandleRpcTimeout(timeout_req, ec);
  283. });
  284. asio::async_write(next_layer_, asio::buffer(*payload),
  285. [shared_this, this, payload](const ::asio::error_code &ec,
  286. size_t size) {
  287. OnSendCompleted(ec, size);
  288. });
  289. } else { // Nothing to send for this request, inform the handler immediately
  290. io_service().post(
  291. // Never hold locks when calling a callback
  292. [req]() { req->OnResponseArrived(nullptr, Status::OK()); }
  293. );
  294. // Reschedule to flush the next one
  295. AsyncFlushPendingRequests();
  296. }
  297. }
  298. template <class NextLayer>
  299. void RpcConnectionImpl<NextLayer>::OnRecvCompleted(const ::asio::error_code &asio_ec,
  300. size_t) {
  301. using std::placeholders::_1;
  302. using std::placeholders::_2;
  303. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  304. LOG_TRACE(kRPC, << "RpcConnectionImpl::OnRecvCompleted called");
  305. std::shared_ptr<RpcConnection> shared_this = shared_from_this();
  306. ::asio::error_code ec = asio_ec;
  307. if(event_handlers_) {
  308. auto event_resp = event_handlers_->call(FS_NN_READ_EVENT, cluster_name_.c_str(), 0);
  309. #ifndef NDEBUG
  310. if (event_resp.response() == event_response::kTest_Error) {
  311. ec = std::make_error_code(std::errc::network_down);
  312. }
  313. #endif
  314. }
  315. switch (ec.value()) {
  316. case 0:
  317. // No errors
  318. break;
  319. case asio::error::operation_aborted:
  320. // The event loop has been shut down. Ignore the error.
  321. return;
  322. default:
  323. LOG_WARN(kRPC, << "Network error during RPC read: " << ec.message());
  324. CommsError(ToStatus(ec));
  325. return;
  326. }
  327. if (!current_response_state_) { /* start a new one */
  328. current_response_state_ = std::make_shared<Response>();
  329. }
  330. if (current_response_state_->state_ == Response::kReadLength) {
  331. current_response_state_->state_ = Response::kReadContent;
  332. auto buf = ::asio::buffer(reinterpret_cast<char *>(&current_response_state_->length_),
  333. sizeof(current_response_state_->length_));
  334. asio::async_read(
  335. next_layer_, buf,
  336. [shared_this, this](const ::asio::error_code &ec, size_t size) {
  337. OnRecvCompleted(ec, size);
  338. });
  339. } else if (current_response_state_->state_ == Response::kReadContent) {
  340. current_response_state_->state_ = Response::kParseResponse;
  341. current_response_state_->length_ = ntohl(current_response_state_->length_);
  342. current_response_state_->data_.resize(current_response_state_->length_);
  343. asio::async_read(
  344. next_layer_, ::asio::buffer(current_response_state_->data_),
  345. [shared_this, this](const ::asio::error_code &ec, size_t size) {
  346. OnRecvCompleted(ec, size);
  347. });
  348. } else if (current_response_state_->state_ == Response::kParseResponse) {
  349. HandleRpcResponse(current_response_state_);
  350. current_response_state_ = nullptr;
  351. StartReading();
  352. }
  353. }
  354. template <class NextLayer>
  355. void RpcConnectionImpl<NextLayer>::Disconnect() {
  356. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  357. LOG_INFO(kRPC, << "RpcConnectionImpl::Disconnect called");
  358. request_over_the_wire_.reset();
  359. if (connected_ == kConnecting || connected_ == kHandshaking || connected_ == kAuthenticating || connected_ == kConnected) {
  360. // Don't print out errors, we were expecting a disconnect here
  361. SafeDisconnect(get_asio_socket_ptr(&next_layer_));
  362. }
  363. connected_ = kDisconnected;
  364. }
  365. }
  366. #endif