rpc_connection.cc 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #include "rpc_engine.h"
  19. #include "sasl_protocol.h"
  20. #include "RpcHeader.pb.h"
  21. #include "ProtobufRpcEngine.pb.h"
  22. #include "IpcConnectionContext.pb.h"
  23. #include "common/logging.h"
  24. #include "common/util.h"
  25. #include <asio/read.hpp>
  26. namespace hdfs {
  27. namespace pb = ::google::protobuf;
  28. namespace pbio = ::google::protobuf::io;
  29. using namespace ::hadoop::common;
  30. using namespace ::std::placeholders;
  31. static const int kNoRetry = -1;
  32. static void AddHeadersToPacket(
  33. std::string *res, std::initializer_list<const pb::MessageLite *> headers,
  34. const std::string *payload) {
  35. int len = 0;
  36. std::for_each(
  37. headers.begin(), headers.end(),
  38. [&len](const pb::MessageLite *v) { len += DelimitedPBMessageSize(v); });
  39. if (payload) {
  40. len += payload->size();
  41. }
  42. int net_len = htonl(len);
  43. res->reserve(res->size() + sizeof(net_len) + len);
  44. pbio::StringOutputStream ss(res);
  45. pbio::CodedOutputStream os(&ss);
  46. os.WriteRaw(reinterpret_cast<const char *>(&net_len), sizeof(net_len));
  47. uint8_t *buf = os.GetDirectBufferForNBytesAndAdvance(len);
  48. assert(buf);
  49. std::for_each(
  50. headers.begin(), headers.end(), [&buf](const pb::MessageLite *v) {
  51. buf = pbio::CodedOutputStream::WriteVarint32ToArray(v->ByteSize(), buf);
  52. buf = v->SerializeWithCachedSizesToArray(buf);
  53. });
  54. if (payload) {
  55. buf = os.WriteStringToArray(*payload, buf);
  56. }
  57. }
  58. static void ConstructPayload(std::string *res, const pb::MessageLite *header) {
  59. int len = DelimitedPBMessageSize(header);
  60. res->reserve(len);
  61. pbio::StringOutputStream ss(res);
  62. pbio::CodedOutputStream os(&ss);
  63. uint8_t *buf = os.GetDirectBufferForNBytesAndAdvance(len);
  64. assert(buf);
  65. buf = pbio::CodedOutputStream::WriteVarint32ToArray(header->ByteSize(), buf);
  66. buf = header->SerializeWithCachedSizesToArray(buf);
  67. }
  68. static void ConstructPayload(std::string *res, const std::string *request) {
  69. int len =
  70. pbio::CodedOutputStream::VarintSize32(request->size()) + request->size();
  71. res->reserve(len);
  72. pbio::StringOutputStream ss(res);
  73. pbio::CodedOutputStream os(&ss);
  74. uint8_t *buf = os.GetDirectBufferForNBytesAndAdvance(len);
  75. assert(buf);
  76. buf = pbio::CodedOutputStream::WriteVarint32ToArray(request->size(), buf);
  77. buf = os.WriteStringToArray(*request, buf);
  78. }
  79. static void SetRequestHeader(LockFreeRpcEngine *engine, int call_id,
  80. const std::string &method_name, int retry_count,
  81. RpcRequestHeaderProto *rpc_header,
  82. RequestHeaderProto *req_header) {
  83. rpc_header->set_rpckind(RPC_PROTOCOL_BUFFER);
  84. rpc_header->set_rpcop(RpcRequestHeaderProto::RPC_FINAL_PACKET);
  85. rpc_header->set_callid(call_id);
  86. if (retry_count != kNoRetry)
  87. rpc_header->set_retrycount(retry_count);
  88. rpc_header->set_clientid(engine->client_name());
  89. req_header->set_methodname(method_name);
  90. req_header->set_declaringclassprotocolname(engine->protocol_name());
  91. req_header->set_clientprotocolversion(engine->protocol_version());
  92. }
  93. RpcConnection::~RpcConnection() {}
  94. Request::Request(LockFreeRpcEngine *engine, const std::string &method_name, int call_id,
  95. const std::string &request, Handler &&handler)
  96. : engine_(engine),
  97. method_name_(method_name),
  98. call_id_(call_id),
  99. timer_(engine->io_service()),
  100. handler_(std::move(handler)),
  101. retry_count_(engine->retry_policy() ? 0 : kNoRetry) {
  102. ConstructPayload(&payload_, &request);
  103. }
  104. Request::Request(LockFreeRpcEngine *engine, const std::string &method_name, int call_id,
  105. const pb::MessageLite *request, Handler &&handler)
  106. : engine_(engine),
  107. method_name_(method_name),
  108. call_id_(call_id),
  109. timer_(engine->io_service()),
  110. handler_(std::move(handler)),
  111. retry_count_(engine->retry_policy() ? 0 : kNoRetry) {
  112. ConstructPayload(&payload_, request);
  113. }
  114. Request::Request(LockFreeRpcEngine *engine, Handler &&handler)
  115. : engine_(engine),
  116. call_id_(-1),
  117. timer_(engine->io_service()),
  118. handler_(std::move(handler)),
  119. retry_count_(engine->retry_policy() ? 0 : kNoRetry) {
  120. }
  121. void Request::GetPacket(std::string *res) const {
  122. if (payload_.empty())
  123. return;
  124. RpcRequestHeaderProto rpc_header;
  125. RequestHeaderProto req_header;
  126. SetRequestHeader(engine_, call_id_, method_name_, retry_count_, &rpc_header,
  127. &req_header);
  128. // SASL messages don't have a request header
  129. if (method_name_ != SASL_METHOD_NAME)
  130. AddHeadersToPacket(res, {&rpc_header, &req_header}, &payload_);
  131. else
  132. AddHeadersToPacket(res, {&rpc_header}, &payload_);
  133. }
  134. void Request::OnResponseArrived(pbio::CodedInputStream *is,
  135. const Status &status) {
  136. handler_(is, status);
  137. }
  138. RpcConnection::RpcConnection(LockFreeRpcEngine *engine)
  139. : engine_(engine),
  140. connected_(kNotYetConnected) {}
  141. ::asio::io_service &RpcConnection::io_service() {
  142. return engine_->io_service();
  143. }
  144. void RpcConnection::StartReading() {
  145. auto shared_this = shared_from_this();
  146. io_service().post([shared_this, this] () {
  147. OnRecvCompleted(::asio::error_code(), 0);
  148. });
  149. }
  150. void RpcConnection::HandshakeComplete(const Status &s) {
  151. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  152. LOG_TRACE(kRPC, << "RpcConnectionImpl::HandshakeComplete called");
  153. if (s.ok()) {
  154. if (connected_ == kConnecting) {
  155. auto shared_this = shared_from_this();
  156. connected_ = kAuthenticating;
  157. if (auth_info_.useSASL()) {
  158. #ifdef USE_SASL
  159. sasl_protocol_ = std::make_shared<SaslProtocol>(cluster_name_, auth_info_, shared_from_this());
  160. sasl_protocol_->SetEventHandlers(event_handlers_);
  161. sasl_protocol_->authenticate([shared_this, this](
  162. const Status & status, const AuthInfo & new_auth_info) {
  163. AuthComplete(status, new_auth_info); } );
  164. #else
  165. AuthComplete_locked(Status::Error("SASL is required, but no SASL library was found"), auth_info_);
  166. #endif
  167. } else {
  168. AuthComplete_locked(Status::OK(), auth_info_);
  169. }
  170. }
  171. } else {
  172. CommsError(s);
  173. };
  174. }
  175. void RpcConnection::AuthComplete(const Status &s, const AuthInfo & new_auth_info) {
  176. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  177. AuthComplete_locked(s, new_auth_info);
  178. }
  179. void RpcConnection::AuthComplete_locked(const Status &s, const AuthInfo & new_auth_info) {
  180. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  181. LOG_TRACE(kRPC, << "RpcConnectionImpl::AuthComplete called");
  182. // Free the sasl_protocol object
  183. sasl_protocol_.reset();
  184. if (s.ok()) {
  185. auth_info_ = new_auth_info;
  186. auto shared_this = shared_from_this();
  187. SendContext([shared_this, this](const Status & s) {
  188. ContextComplete(s);
  189. });
  190. } else {
  191. CommsError(s);
  192. };
  193. }
  194. void RpcConnection::ContextComplete(const Status &s) {
  195. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  196. LOG_TRACE(kRPC, << "RpcConnectionImpl::ContextComplete called");
  197. if (s.ok()) {
  198. if (connected_ == kAuthenticating) {
  199. connected_ = kConnected;
  200. }
  201. FlushPendingRequests();
  202. } else {
  203. CommsError(s);
  204. };
  205. }
  206. void RpcConnection::AsyncFlushPendingRequests() {
  207. std::shared_ptr<RpcConnection> shared_this = shared_from_this();
  208. io_service().post([shared_this, this]() {
  209. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  210. LOG_TRACE(kRPC, << "RpcConnection::AsyncFlushPendingRequests called (connected=" << ToString(connected_) << ")");
  211. if (!request_over_the_wire_) {
  212. FlushPendingRequests();
  213. }
  214. });
  215. }
  216. void RpcConnection::HandleRpcResponse(std::shared_ptr<Response> response) {
  217. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  218. response->ar.reset(new pbio::ArrayInputStream(&response->data_[0], response->data_.size()));
  219. response->in.reset(new pbio::CodedInputStream(response->ar.get()));
  220. response->in->PushLimit(response->data_.size());
  221. RpcResponseHeaderProto h;
  222. ReadDelimitedPBMessage(response->in.get(), &h);
  223. auto req = RemoveFromRunningQueue(h.callid());
  224. if (!req) {
  225. LOG_WARN(kRPC, << "RPC response with Unknown call id " << h.callid());
  226. return;
  227. }
  228. Status status;
  229. if (h.has_exceptionclassname()) {
  230. status =
  231. Status::Exception(h.exceptionclassname().c_str(), h.errormsg().c_str());
  232. }
  233. io_service().post([req, response, status]() {
  234. req->OnResponseArrived(response->in.get(), status); // Never call back while holding a lock
  235. });
  236. }
  237. void RpcConnection::HandleRpcTimeout(std::shared_ptr<Request> req,
  238. const ::asio::error_code &ec) {
  239. if (ec.value() == asio::error::operation_aborted) {
  240. return;
  241. }
  242. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  243. auto r = RemoveFromRunningQueue(req->call_id());
  244. if (!r) {
  245. // The RPC might have been finished and removed from the queue
  246. return;
  247. }
  248. Status stat = ToStatus(ec ? ec : make_error_code(::asio::error::timed_out));
  249. r->OnResponseArrived(nullptr, stat);
  250. }
  251. std::shared_ptr<std::string> RpcConnection::PrepareHandshakePacket() {
  252. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  253. /** From Client.java:
  254. *
  255. * Write the connection header - this is sent when connection is established
  256. * +----------------------------------+
  257. * | "hrpc" 4 bytes |
  258. * +----------------------------------+
  259. * | Version (1 byte) |
  260. * +----------------------------------+
  261. * | Service Class (1 byte) |
  262. * +----------------------------------+
  263. * | AuthProtocol (1 byte) |
  264. * +----------------------------------+
  265. *
  266. * AuthProtocol: 0->none, -33->SASL
  267. */
  268. char auth_protocol = auth_info_.useSASL() ? -33 : 0;
  269. const char handshake_header[] = {'h', 'r', 'p', 'c',
  270. RpcEngine::kRpcVersion, 0, auth_protocol};
  271. auto res =
  272. std::make_shared<std::string>(handshake_header, sizeof(handshake_header));
  273. return res;
  274. }
  275. std::shared_ptr<std::string> RpcConnection::PrepareContextPacket() {
  276. // This needs to be send after the SASL handshake, and
  277. // after the SASL handshake (if any)
  278. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  279. auto res = std::make_shared<std::string>();
  280. RpcRequestHeaderProto h;
  281. h.set_rpckind(RPC_PROTOCOL_BUFFER);
  282. h.set_rpcop(RpcRequestHeaderProto::RPC_FINAL_PACKET);
  283. h.set_callid(RpcEngine::kCallIdConnectionContext);
  284. h.set_clientid(engine_->client_name());
  285. IpcConnectionContextProto handshake;
  286. handshake.set_protocol(engine_->protocol_name());
  287. const std::string & user_name = auth_info_.getUser();
  288. if (!user_name.empty()) {
  289. *handshake.mutable_userinfo()->mutable_effectiveuser() = user_name;
  290. }
  291. AddHeadersToPacket(res.get(), {&h, &handshake}, nullptr);
  292. return res;
  293. }
  294. void RpcConnection::AsyncRpc(
  295. const std::string &method_name, const ::google::protobuf::MessageLite *req,
  296. std::shared_ptr<::google::protobuf::MessageLite> resp,
  297. const RpcCallback &handler) {
  298. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  299. AsyncRpc_locked(method_name, req, resp, handler);
  300. }
  301. void RpcConnection::AsyncRpc_locked(
  302. const std::string &method_name, const ::google::protobuf::MessageLite *req,
  303. std::shared_ptr<::google::protobuf::MessageLite> resp,
  304. const RpcCallback &handler) {
  305. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  306. auto wrapped_handler =
  307. [resp, handler](pbio::CodedInputStream *is, const Status &status) {
  308. if (status.ok()) {
  309. if (is) { // Connect messages will not have an is
  310. ReadDelimitedPBMessage(is, resp.get());
  311. }
  312. }
  313. handler(status);
  314. };
  315. int call_id = (method_name != SASL_METHOD_NAME ? engine_->NextCallId() : RpcEngine::kCallIdSasl);
  316. auto r = std::make_shared<Request>(engine_, method_name, call_id, req,
  317. std::move(wrapped_handler));
  318. auto r_vector = std::vector<std::shared_ptr<Request> > (1, r);
  319. SendRpcRequests(r_vector);
  320. }
  321. void RpcConnection::AsyncRpc(const std::vector<std::shared_ptr<Request> > & requests) {
  322. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  323. SendRpcRequests(requests);
  324. }
  325. void RpcConnection::SendRpcRequests(const std::vector<std::shared_ptr<Request> > & requests) {
  326. LOG_TRACE(kRPC, << "RpcConnection::SendRpcRequests[] called; connected=" << ToString(connected_));
  327. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  328. if (connected_ == kDisconnected) {
  329. // Oops. The connection failed _just_ before the engine got a chance
  330. // to send it. Register it as a failure
  331. Status status = Status::ResourceUnavailable("RpcConnection closed before send.");
  332. engine_->AsyncRpcCommsError(status, shared_from_this(), requests);
  333. } else {
  334. pending_requests_.reserve(pending_requests_.size() + requests.size());
  335. for (auto r: requests) {
  336. if (r->method_name() != SASL_METHOD_NAME)
  337. pending_requests_.push_back(r);
  338. else
  339. auth_requests_.push_back(r);
  340. }
  341. if (connected_ == kConnected || connected_ == kAuthenticating) { // Dont flush if we're waiting or handshaking
  342. FlushPendingRequests();
  343. }
  344. }
  345. }
  346. void RpcConnection::PreEnqueueRequests(
  347. std::vector<std::shared_ptr<Request>> requests) {
  348. // Public method - acquire lock
  349. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  350. LOG_DEBUG(kRPC, << "RpcConnection::PreEnqueueRequests called");
  351. assert(connected_ == kNotYetConnected);
  352. pending_requests_.insert(pending_requests_.end(), requests.begin(),
  353. requests.end());
  354. // Don't start sending yet; will flush when connected
  355. }
  356. void RpcConnection::SetEventHandlers(std::shared_ptr<LibhdfsEvents> event_handlers) {
  357. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  358. event_handlers_ = event_handlers;
  359. if (sasl_protocol_) {
  360. sasl_protocol_->SetEventHandlers(event_handlers);
  361. }
  362. }
  363. void RpcConnection::SetClusterName(std::string cluster_name) {
  364. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  365. cluster_name_ = cluster_name;
  366. }
  367. void RpcConnection::CommsError(const Status &status) {
  368. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  369. Disconnect();
  370. // Anything that has been queued to the connection (on the fly or pending)
  371. // will get dinged for a retry
  372. std::vector<std::shared_ptr<Request>> requestsToReturn;
  373. std::transform(requests_on_fly_.begin(), requests_on_fly_.end(),
  374. std::back_inserter(requestsToReturn),
  375. std::bind(&RequestOnFlyMap::value_type::second, _1));
  376. requests_on_fly_.clear();
  377. requestsToReturn.insert(requestsToReturn.end(),
  378. std::make_move_iterator(pending_requests_.begin()),
  379. std::make_move_iterator(pending_requests_.end()));
  380. pending_requests_.clear();
  381. engine_->AsyncRpcCommsError(status, shared_from_this(), requestsToReturn);
  382. }
  383. void RpcConnection::ClearAndDisconnect(const ::asio::error_code &ec) {
  384. Disconnect();
  385. std::vector<std::shared_ptr<Request>> requests;
  386. std::transform(requests_on_fly_.begin(), requests_on_fly_.end(),
  387. std::back_inserter(requests),
  388. std::bind(&RequestOnFlyMap::value_type::second, _1));
  389. requests_on_fly_.clear();
  390. requests.insert(requests.end(),
  391. std::make_move_iterator(pending_requests_.begin()),
  392. std::make_move_iterator(pending_requests_.end()));
  393. pending_requests_.clear();
  394. for (const auto &req : requests) {
  395. req->OnResponseArrived(nullptr, ToStatus(ec));
  396. }
  397. }
  398. std::shared_ptr<Request> RpcConnection::RemoveFromRunningQueue(int call_id) {
  399. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  400. auto it = requests_on_fly_.find(call_id);
  401. if (it == requests_on_fly_.end()) {
  402. return std::shared_ptr<Request>();
  403. }
  404. auto req = it->second;
  405. requests_on_fly_.erase(it);
  406. return req;
  407. }
  408. std::string RpcConnection::ToString(ConnectedState connected) {
  409. switch(connected) {
  410. case kNotYetConnected: return "NotYetConnected";
  411. case kConnecting: return "Connecting";
  412. case kAuthenticating: return "Authenticating";
  413. case kConnected: return "Connected";
  414. case kDisconnected: return "Disconnected";
  415. default: return "Invalid ConnectedState";
  416. }
  417. }
  418. }