rpc_connection.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #ifndef LIB_RPC_RPC_CONNECTION_H_
  19. #define LIB_RPC_RPC_CONNECTION_H_
  20. #include "rpc_engine.h"
  21. #include "common/auth_info.h"
  22. #include "common/logging.h"
  23. #include "common/util.h"
  24. #include "common/libhdfs_events_impl.h"
  25. #include "sasl_protocol.h"
  26. #include <asio/connect.hpp>
  27. #include <asio/read.hpp>
  28. #include <asio/write.hpp>
  29. #include <system_error>
  30. namespace hdfs {
  31. template <class NextLayer>
  32. class RpcConnectionImpl : public RpcConnection {
  33. public:
  34. RpcConnectionImpl(RpcEngine *engine);
  35. virtual ~RpcConnectionImpl() override;
  36. virtual void Connect(const std::vector<::asio::ip::tcp::endpoint> &server,
  37. const AuthInfo & auth_info,
  38. RpcCallback &handler);
  39. virtual void ConnectAndFlush(
  40. const std::vector<::asio::ip::tcp::endpoint> &server) override;
  41. virtual void SendHandshake(RpcCallback &handler) override;
  42. virtual void SendContext(RpcCallback &handler) override;
  43. virtual void Disconnect() override;
  44. virtual void OnSendCompleted(const ::asio::error_code &ec,
  45. size_t transferred) override;
  46. virtual void OnRecvCompleted(const ::asio::error_code &ec,
  47. size_t transferred) override;
  48. virtual void FlushPendingRequests() override;
  49. NextLayer &next_layer() { return next_layer_; }
  50. void TEST_set_connected(bool connected) { connected_ = connected ? kConnected : kNotYetConnected; }
  51. private:
  52. const Options options_;
  53. std::vector<::asio::ip::tcp::endpoint> additional_endpoints_;
  54. NextLayer next_layer_;
  55. void ConnectComplete(const ::asio::error_code &ec);
  56. // Hide default ctors.
  57. RpcConnectionImpl();
  58. RpcConnectionImpl(const RpcConnectionImpl &other);
  59. };
  60. template <class NextLayer>
  61. RpcConnectionImpl<NextLayer>::RpcConnectionImpl(RpcEngine *engine)
  62. : RpcConnection(engine),
  63. options_(engine->options()),
  64. next_layer_(engine->io_service()) {
  65. LOG_TRACE(kRPC, << "RpcConnectionImpl::RpcConnectionImpl called &" << (void*)this);
  66. }
  67. template <class NextLayer>
  68. RpcConnectionImpl<NextLayer>::~RpcConnectionImpl() {
  69. LOG_DEBUG(kRPC, << "RpcConnectionImpl::~RpcConnectionImpl called &" << (void*)this);
  70. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  71. if (pending_requests_.size() > 0)
  72. LOG_WARN(kRPC, << "RpcConnectionImpl::~RpcConnectionImpl called with items in the pending queue");
  73. if (requests_on_fly_.size() > 0)
  74. LOG_WARN(kRPC, << "RpcConnectionImpl::~RpcConnectionImpl called with items in the requests_on_fly queue");
  75. }
  76. template <class NextLayer>
  77. void RpcConnectionImpl<NextLayer>::Connect(
  78. const std::vector<::asio::ip::tcp::endpoint> &server,
  79. const AuthInfo & auth_info,
  80. RpcCallback &handler) {
  81. LOG_TRACE(kRPC, << "RpcConnectionImpl::Connect called");
  82. this->auth_info_ = auth_info;
  83. auto connectionSuccessfulReq = std::make_shared<Request>(
  84. engine_, [handler](::google::protobuf::io::CodedInputStream *is,
  85. const Status &status) {
  86. (void)is;
  87. handler(status);
  88. });
  89. pending_requests_.push_back(connectionSuccessfulReq);
  90. this->ConnectAndFlush(server); // need "this" so compiler can infer type of CAF
  91. }
  92. template <class NextLayer>
  93. void RpcConnectionImpl<NextLayer>::ConnectAndFlush(
  94. const std::vector<::asio::ip::tcp::endpoint> &server) {
  95. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  96. if (server.empty()) {
  97. Status s = Status::InvalidArgument("No endpoints provided");
  98. CommsError(s);
  99. return;
  100. }
  101. if (connected_ == kConnected) {
  102. FlushPendingRequests();
  103. return;
  104. }
  105. if (connected_ != kNotYetConnected) {
  106. LOG_WARN(kRPC, << "RpcConnectionImpl::ConnectAndFlush called while connected=" << ToString(connected_));
  107. return;
  108. }
  109. connected_ = kConnecting;
  110. // Take the first endpoint, but remember the alternatives for later
  111. additional_endpoints_ = server;
  112. ::asio::ip::tcp::endpoint first_endpoint = additional_endpoints_.front();
  113. additional_endpoints_.erase(additional_endpoints_.begin());
  114. auto shared_this = shared_from_this();
  115. next_layer_.async_connect(first_endpoint, [shared_this, this](const ::asio::error_code &ec) {
  116. ConnectComplete(ec);
  117. });
  118. }
  119. template <class NextLayer>
  120. void RpcConnectionImpl<NextLayer>::ConnectComplete(const ::asio::error_code &ec) {
  121. auto shared_this = RpcConnectionImpl<NextLayer>::shared_from_this();
  122. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  123. LOG_TRACE(kRPC, << "RpcConnectionImpl::ConnectComplete called");
  124. Status status = ToStatus(ec);
  125. if(event_handlers_) {
  126. event_response event_resp = event_handlers_->call(FS_NN_CONNECT_EVENT, cluster_name_.c_str(), 0);
  127. #ifndef NDEBUG
  128. if (event_resp.response() == event_response::kTest_Error) {
  129. status = event_resp.status();
  130. }
  131. #endif
  132. }
  133. if (status.ok()) {
  134. StartReading();
  135. SendHandshake([shared_this, this](const Status & s) {
  136. HandshakeComplete(s);
  137. });
  138. } else {
  139. std::string err = SafeDisconnect(get_asio_socket_ptr(&next_layer_));
  140. if(!err.empty()) {
  141. LOG_INFO(kRPC, << "Rpc connection failed to connect to endpoint, error closing connection: " << err);
  142. }
  143. if (!additional_endpoints_.empty()) {
  144. // If we have additional endpoints, keep trying until we either run out or
  145. // hit one
  146. ::asio::ip::tcp::endpoint next_endpoint = additional_endpoints_.front();
  147. additional_endpoints_.erase(additional_endpoints_.begin());
  148. next_layer_.async_connect(next_endpoint, [shared_this, this](const ::asio::error_code &ec) {
  149. ConnectComplete(ec);
  150. });
  151. } else {
  152. CommsError(status);
  153. }
  154. }
  155. }
  156. template <class NextLayer>
  157. void RpcConnectionImpl<NextLayer>::SendHandshake(RpcCallback &handler) {
  158. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  159. LOG_TRACE(kRPC, << "RpcConnectionImpl::SendHandshake called");
  160. auto shared_this = shared_from_this();
  161. auto handshake_packet = PrepareHandshakePacket();
  162. ::asio::async_write(next_layer_, asio::buffer(*handshake_packet),
  163. [handshake_packet, handler, shared_this, this](
  164. const ::asio::error_code &ec, size_t) {
  165. Status status = ToStatus(ec);
  166. handler(status);
  167. });
  168. }
  169. template <class NextLayer>
  170. void RpcConnectionImpl<NextLayer>::SendContext(RpcCallback &handler) {
  171. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  172. LOG_TRACE(kRPC, << "RpcConnectionImpl::SendContext called");
  173. auto shared_this = shared_from_this();
  174. auto context_packet = PrepareContextPacket();
  175. ::asio::async_write(next_layer_, asio::buffer(*context_packet),
  176. [context_packet, handler, shared_this, this](
  177. const ::asio::error_code &ec, size_t) {
  178. Status status = ToStatus(ec);
  179. handler(status);
  180. });
  181. }
  182. template <class NextLayer>
  183. void RpcConnectionImpl<NextLayer>::OnSendCompleted(const ::asio::error_code &ec,
  184. size_t) {
  185. using std::placeholders::_1;
  186. using std::placeholders::_2;
  187. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  188. LOG_TRACE(kRPC, << "RpcConnectionImpl::OnSendCompleted called");
  189. request_over_the_wire_.reset();
  190. if (ec) {
  191. LOG_WARN(kRPC, << "Network error during RPC write: " << ec.message());
  192. CommsError(ToStatus(ec));
  193. return;
  194. }
  195. FlushPendingRequests();
  196. }
  197. template <class NextLayer>
  198. void RpcConnectionImpl<NextLayer>::FlushPendingRequests() {
  199. using namespace ::std::placeholders;
  200. // Lock should be held
  201. assert(lock_held(connection_state_lock_));
  202. LOG_TRACE(kRPC, << "RpcConnectionImpl::FlushPendingRequests called");
  203. // Don't send if we don't need to
  204. if (request_over_the_wire_) {
  205. return;
  206. }
  207. std::shared_ptr<Request> req;
  208. switch (connected_) {
  209. case kNotYetConnected:
  210. return;
  211. case kConnecting:
  212. return;
  213. case kAuthenticating:
  214. if (auth_requests_.empty()) {
  215. return;
  216. }
  217. req = auth_requests_.front();
  218. auth_requests_.erase(auth_requests_.begin());
  219. break;
  220. case kConnected:
  221. if (pending_requests_.empty()) {
  222. return;
  223. }
  224. req = pending_requests_.front();
  225. pending_requests_.erase(pending_requests_.begin());
  226. break;
  227. case kDisconnected:
  228. LOG_DEBUG(kRPC, << "RpcConnectionImpl::FlushPendingRequests attempted to flush a " << ToString(connected_) << " connection");
  229. return;
  230. default:
  231. LOG_DEBUG(kRPC, << "RpcConnectionImpl::FlushPendingRequests invalid state: " << ToString(connected_));
  232. return;
  233. }
  234. std::shared_ptr<RpcConnection> shared_this = shared_from_this();
  235. auto weak_this = std::weak_ptr<RpcConnection>(shared_this);
  236. auto weak_req = std::weak_ptr<Request>(req);
  237. std::shared_ptr<std::string> payload = std::make_shared<std::string>();
  238. req->GetPacket(payload.get());
  239. if (!payload->empty()) {
  240. assert(requests_on_fly_.find(req->call_id()) == requests_on_fly_.end());
  241. requests_on_fly_[req->call_id()] = req;
  242. request_over_the_wire_ = req;
  243. req->timer().expires_from_now(
  244. std::chrono::milliseconds(options_.rpc_timeout));
  245. req->timer().async_wait([weak_this, weak_req, this](const ::asio::error_code &ec) {
  246. auto timeout_this = weak_this.lock();
  247. auto timeout_req = weak_req.lock();
  248. if (timeout_this && timeout_req)
  249. this->HandleRpcTimeout(timeout_req, ec);
  250. });
  251. asio::async_write(next_layer_, asio::buffer(*payload),
  252. [shared_this, this, payload](const ::asio::error_code &ec,
  253. size_t size) {
  254. OnSendCompleted(ec, size);
  255. });
  256. } else { // Nothing to send for this request, inform the handler immediately
  257. io_service().post(
  258. // Never hold locks when calling a callback
  259. [req]() { req->OnResponseArrived(nullptr, Status::OK()); }
  260. );
  261. // Reschedule to flush the next one
  262. AsyncFlushPendingRequests();
  263. }
  264. }
  265. template <class NextLayer>
  266. void RpcConnectionImpl<NextLayer>::OnRecvCompleted(const ::asio::error_code &original_ec,
  267. size_t) {
  268. using std::placeholders::_1;
  269. using std::placeholders::_2;
  270. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  271. ::asio::error_code my_ec(original_ec);
  272. LOG_TRACE(kRPC, << "RpcConnectionImpl::OnRecvCompleted called");
  273. std::shared_ptr<RpcConnection> shared_this = shared_from_this();
  274. if(event_handlers_) {
  275. event_response event_resp = event_handlers_->call(FS_NN_READ_EVENT, cluster_name_.c_str(), 0);
  276. #ifndef NDEBUG
  277. if (event_resp.response() == event_response::kTest_Error) {
  278. my_ec = std::make_error_code(std::errc::network_down);
  279. }
  280. #endif
  281. }
  282. switch (my_ec.value()) {
  283. case 0:
  284. // No errors
  285. break;
  286. case asio::error::operation_aborted:
  287. // The event loop has been shut down. Ignore the error.
  288. return;
  289. default:
  290. LOG_WARN(kRPC, << "Network error during RPC read: " << my_ec.message());
  291. CommsError(ToStatus(my_ec));
  292. return;
  293. }
  294. if (!current_response_state_) { /* start a new one */
  295. current_response_state_ = std::make_shared<Response>();
  296. }
  297. if (current_response_state_->state_ == Response::kReadLength) {
  298. current_response_state_->state_ = Response::kReadContent;
  299. auto buf = ::asio::buffer(reinterpret_cast<char *>(&current_response_state_->length_),
  300. sizeof(current_response_state_->length_));
  301. asio::async_read(
  302. next_layer_, buf,
  303. [shared_this, this](const ::asio::error_code &ec, size_t size) {
  304. OnRecvCompleted(ec, size);
  305. });
  306. } else if (current_response_state_->state_ == Response::kReadContent) {
  307. current_response_state_->state_ = Response::kParseResponse;
  308. current_response_state_->length_ = ntohl(current_response_state_->length_);
  309. current_response_state_->data_.resize(current_response_state_->length_);
  310. asio::async_read(
  311. next_layer_, ::asio::buffer(current_response_state_->data_),
  312. [shared_this, this](const ::asio::error_code &ec, size_t size) {
  313. OnRecvCompleted(ec, size);
  314. });
  315. } else if (current_response_state_->state_ == Response::kParseResponse) {
  316. HandleRpcResponse(current_response_state_);
  317. current_response_state_ = nullptr;
  318. StartReading();
  319. }
  320. }
  321. template <class NextLayer>
  322. void RpcConnectionImpl<NextLayer>::Disconnect() {
  323. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  324. LOG_INFO(kRPC, << "RpcConnectionImpl::Disconnect called");
  325. request_over_the_wire_.reset();
  326. if (connected_ == kConnecting || connected_ == kAuthenticating || connected_ == kConnected) {
  327. // Don't print out errors, we were expecting a disconnect here
  328. SafeDisconnect(get_asio_socket_ptr(&next_layer_));
  329. }
  330. connected_ = kDisconnected;
  331. }
  332. }
  333. #endif