rpc_connection.cc 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #include "rpc_engine.h"
  19. #include "sasl_protocol.h"
  20. #include "RpcHeader.pb.h"
  21. #include "ProtobufRpcEngine.pb.h"
  22. #include "IpcConnectionContext.pb.h"
  23. #include "common/logging.h"
  24. #include "common/util.h"
  25. #include <asio/read.hpp>
  26. namespace hdfs {
  27. namespace pb = ::google::protobuf;
  28. namespace pbio = ::google::protobuf::io;
  29. using namespace ::hadoop::common;
  30. using namespace ::std::placeholders;
  31. static const int kNoRetry = -1;
  32. static void AddHeadersToPacket(
  33. std::string *res, std::initializer_list<const pb::MessageLite *> headers,
  34. const std::string *payload) {
  35. int len = 0;
  36. std::for_each(
  37. headers.begin(), headers.end(),
  38. [&len](const pb::MessageLite *v) { len += DelimitedPBMessageSize(v); });
  39. if (payload) {
  40. len += payload->size();
  41. }
  42. int net_len = htonl(len);
  43. res->reserve(res->size() + sizeof(net_len) + len);
  44. pbio::StringOutputStream ss(res);
  45. pbio::CodedOutputStream os(&ss);
  46. os.WriteRaw(reinterpret_cast<const char *>(&net_len), sizeof(net_len));
  47. uint8_t *buf = os.GetDirectBufferForNBytesAndAdvance(len);
  48. assert(buf);
  49. std::for_each(
  50. headers.begin(), headers.end(), [&buf](const pb::MessageLite *v) {
  51. buf = pbio::CodedOutputStream::WriteVarint32ToArray(v->ByteSize(), buf);
  52. buf = v->SerializeWithCachedSizesToArray(buf);
  53. });
  54. if (payload) {
  55. buf = os.WriteStringToArray(*payload, buf);
  56. }
  57. }
  58. static void ConstructPayload(std::string *res, const pb::MessageLite *header) {
  59. int len = DelimitedPBMessageSize(header);
  60. res->reserve(len);
  61. pbio::StringOutputStream ss(res);
  62. pbio::CodedOutputStream os(&ss);
  63. uint8_t *buf = os.GetDirectBufferForNBytesAndAdvance(len);
  64. assert(buf);
  65. buf = pbio::CodedOutputStream::WriteVarint32ToArray(header->ByteSize(), buf);
  66. buf = header->SerializeWithCachedSizesToArray(buf);
  67. }
  68. static void ConstructPayload(std::string *res, const std::string *request) {
  69. int len =
  70. pbio::CodedOutputStream::VarintSize32(request->size()) + request->size();
  71. res->reserve(len);
  72. pbio::StringOutputStream ss(res);
  73. pbio::CodedOutputStream os(&ss);
  74. uint8_t *buf = os.GetDirectBufferForNBytesAndAdvance(len);
  75. assert(buf);
  76. buf = pbio::CodedOutputStream::WriteVarint32ToArray(request->size(), buf);
  77. buf = os.WriteStringToArray(*request, buf);
  78. }
  79. static void SetRequestHeader(LockFreeRpcEngine *engine, int call_id,
  80. const std::string &method_name, int retry_count,
  81. RpcRequestHeaderProto *rpc_header,
  82. RequestHeaderProto *req_header) {
  83. rpc_header->set_rpckind(RPC_PROTOCOL_BUFFER);
  84. rpc_header->set_rpcop(RpcRequestHeaderProto::RPC_FINAL_PACKET);
  85. rpc_header->set_callid(call_id);
  86. if (retry_count != kNoRetry)
  87. rpc_header->set_retrycount(retry_count);
  88. rpc_header->set_clientid(engine->client_name());
  89. req_header->set_methodname(method_name);
  90. req_header->set_declaringclassprotocolname(engine->protocol_name());
  91. req_header->set_clientprotocolversion(engine->protocol_version());
  92. }
  93. RpcConnection::~RpcConnection() {}
  94. Request::Request(LockFreeRpcEngine *engine, const std::string &method_name, int call_id,
  95. const std::string &request, Handler &&handler)
  96. : engine_(engine),
  97. method_name_(method_name),
  98. call_id_(call_id),
  99. timer_(engine->io_service()),
  100. handler_(std::move(handler)),
  101. retry_count_(engine->retry_policy() ? 0 : kNoRetry) {
  102. ConstructPayload(&payload_, &request);
  103. }
  104. Request::Request(LockFreeRpcEngine *engine, const std::string &method_name, int call_id,
  105. const pb::MessageLite *request, Handler &&handler)
  106. : engine_(engine),
  107. method_name_(method_name),
  108. call_id_(call_id),
  109. timer_(engine->io_service()),
  110. handler_(std::move(handler)),
  111. retry_count_(engine->retry_policy() ? 0 : kNoRetry) {
  112. ConstructPayload(&payload_, request);
  113. }
  114. Request::Request(LockFreeRpcEngine *engine, Handler &&handler)
  115. : engine_(engine),
  116. call_id_(-1),
  117. timer_(engine->io_service()),
  118. handler_(std::move(handler)),
  119. retry_count_(engine->retry_policy() ? 0 : kNoRetry) {
  120. }
  121. void Request::GetPacket(std::string *res) const {
  122. if (payload_.empty())
  123. return;
  124. RpcRequestHeaderProto rpc_header;
  125. RequestHeaderProto req_header;
  126. SetRequestHeader(engine_, call_id_, method_name_, retry_count_, &rpc_header,
  127. &req_header);
  128. // SASL messages don't have a request header
  129. if (method_name_ != SASL_METHOD_NAME)
  130. AddHeadersToPacket(res, {&rpc_header, &req_header}, &payload_);
  131. else
  132. AddHeadersToPacket(res, {&rpc_header}, &payload_);
  133. }
  134. void Request::OnResponseArrived(pbio::CodedInputStream *is,
  135. const Status &status) {
  136. handler_(is, status);
  137. }
  138. RpcConnection::RpcConnection(LockFreeRpcEngine *engine)
  139. : engine_(engine),
  140. connected_(kNotYetConnected) {}
  141. ::asio::io_service &RpcConnection::io_service() {
  142. return engine_->io_service();
  143. }
  144. void RpcConnection::StartReading() {
  145. auto shared_this = shared_from_this();
  146. io_service().post([shared_this, this] () {
  147. OnRecvCompleted(::asio::error_code(), 0);
  148. });
  149. }
  150. void RpcConnection::HandshakeComplete(const Status &s) {
  151. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  152. LOG_TRACE(kRPC, << "RpcConnectionImpl::HandshakeComplete called");
  153. if (s.ok()) {
  154. if (connected_ == kHandshaking) {
  155. auto shared_this = shared_from_this();
  156. connected_ = kAuthenticating;
  157. if (auth_info_.useSASL()) {
  158. #ifdef USE_SASL
  159. sasl_protocol_ = std::make_shared<SaslProtocol>(cluster_name_, auth_info_, shared_from_this());
  160. sasl_protocol_->SetEventHandlers(event_handlers_);
  161. sasl_protocol_->authenticate([shared_this, this](
  162. const Status & status, const AuthInfo & new_auth_info) {
  163. AuthComplete(status, new_auth_info); } );
  164. #else
  165. AuthComplete_locked(Status::Error("SASL is required, but no SASL library was found"), auth_info_);
  166. #endif
  167. } else {
  168. AuthComplete_locked(Status::OK(), auth_info_);
  169. }
  170. }
  171. } else {
  172. CommsError(s);
  173. };
  174. }
  175. void RpcConnection::AuthComplete(const Status &s, const AuthInfo & new_auth_info) {
  176. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  177. AuthComplete_locked(s, new_auth_info);
  178. }
  179. void RpcConnection::AuthComplete_locked(const Status &s, const AuthInfo & new_auth_info) {
  180. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  181. LOG_TRACE(kRPC, << "RpcConnectionImpl::AuthComplete called");
  182. // Free the sasl_protocol object
  183. sasl_protocol_.reset();
  184. if (s.ok()) {
  185. auth_info_ = new_auth_info;
  186. auto shared_this = shared_from_this();
  187. SendContext([shared_this, this](const Status & s) {
  188. ContextComplete(s);
  189. });
  190. } else {
  191. CommsError(s);
  192. };
  193. }
  194. void RpcConnection::ContextComplete(const Status &s) {
  195. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  196. LOG_TRACE(kRPC, << "RpcConnectionImpl::ContextComplete called");
  197. if (s.ok()) {
  198. if (connected_ == kAuthenticating) {
  199. connected_ = kConnected;
  200. }
  201. FlushPendingRequests();
  202. } else {
  203. CommsError(s);
  204. };
  205. }
  206. void RpcConnection::AsyncFlushPendingRequests() {
  207. std::shared_ptr<RpcConnection> shared_this = shared_from_this();
  208. io_service().post([shared_this, this]() {
  209. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  210. LOG_TRACE(kRPC, << "RpcConnection::AsyncFlushPendingRequests called (connected=" << ToString(connected_) << ")");
  211. if (!request_over_the_wire_) {
  212. FlushPendingRequests();
  213. }
  214. });
  215. }
  216. void RpcConnection::HandleRpcResponse(std::shared_ptr<Response> response) {
  217. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  218. response->ar.reset(new pbio::ArrayInputStream(&response->data_[0], response->data_.size()));
  219. response->in.reset(new pbio::CodedInputStream(response->ar.get()));
  220. response->in->PushLimit(response->data_.size());
  221. RpcResponseHeaderProto h;
  222. ReadDelimitedPBMessage(response->in.get(), &h);
  223. auto req = RemoveFromRunningQueue(h.callid());
  224. if (!req) {
  225. LOG_WARN(kRPC, << "RPC response with Unknown call id " << h.callid());
  226. return;
  227. }
  228. Status status;
  229. if(event_handlers_) {
  230. event_response event_resp = event_handlers_->call(FS_NN_READ_EVENT, cluster_name_.c_str(), 0);
  231. #ifndef LIBHDFSPP_SIMULATE_ERROR_DISABLED
  232. if (event_resp.response() == event_response::kTest_Error) {
  233. status = event_resp.status();
  234. }
  235. #endif
  236. }
  237. if (status.ok() && h.has_exceptionclassname()) {
  238. status =
  239. Status::Exception(h.exceptionclassname().c_str(), h.errormsg().c_str());
  240. }
  241. io_service().post([req, response, status]() {
  242. req->OnResponseArrived(response->in.get(), status); // Never call back while holding a lock
  243. });
  244. }
  245. void RpcConnection::HandleRpcTimeout(std::shared_ptr<Request> req,
  246. const ::asio::error_code &ec) {
  247. if (ec.value() == asio::error::operation_aborted) {
  248. return;
  249. }
  250. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  251. auto r = RemoveFromRunningQueue(req->call_id());
  252. if (!r) {
  253. // The RPC might have been finished and removed from the queue
  254. return;
  255. }
  256. Status stat = ToStatus(ec ? ec : make_error_code(::asio::error::timed_out));
  257. r->OnResponseArrived(nullptr, stat);
  258. }
  259. std::shared_ptr<std::string> RpcConnection::PrepareHandshakePacket() {
  260. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  261. /** From Client.java:
  262. *
  263. * Write the connection header - this is sent when connection is established
  264. * +----------------------------------+
  265. * | "hrpc" 4 bytes |
  266. * +----------------------------------+
  267. * | Version (1 byte) |
  268. * +----------------------------------+
  269. * | Service Class (1 byte) |
  270. * +----------------------------------+
  271. * | AuthProtocol (1 byte) |
  272. * +----------------------------------+
  273. *
  274. * AuthProtocol: 0->none, -33->SASL
  275. */
  276. char auth_protocol = auth_info_.useSASL() ? -33 : 0;
  277. const char handshake_header[] = {'h', 'r', 'p', 'c',
  278. RpcEngine::kRpcVersion, 0, auth_protocol};
  279. auto res =
  280. std::make_shared<std::string>(handshake_header, sizeof(handshake_header));
  281. return res;
  282. }
  283. std::shared_ptr<std::string> RpcConnection::PrepareContextPacket() {
  284. // This needs to be send after the SASL handshake, and
  285. // after the SASL handshake (if any)
  286. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  287. auto res = std::make_shared<std::string>();
  288. RpcRequestHeaderProto h;
  289. h.set_rpckind(RPC_PROTOCOL_BUFFER);
  290. h.set_rpcop(RpcRequestHeaderProto::RPC_FINAL_PACKET);
  291. h.set_callid(RpcEngine::kCallIdConnectionContext);
  292. h.set_clientid(engine_->client_name());
  293. IpcConnectionContextProto handshake;
  294. handshake.set_protocol(engine_->protocol_name());
  295. const std::string & user_name = auth_info_.getUser();
  296. if (!user_name.empty()) {
  297. *handshake.mutable_userinfo()->mutable_effectiveuser() = user_name;
  298. }
  299. AddHeadersToPacket(res.get(), {&h, &handshake}, nullptr);
  300. return res;
  301. }
  302. void RpcConnection::AsyncRpc(
  303. const std::string &method_name, const ::google::protobuf::MessageLite *req,
  304. std::shared_ptr<::google::protobuf::MessageLite> resp,
  305. const RpcCallback &handler) {
  306. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  307. AsyncRpc_locked(method_name, req, resp, handler);
  308. }
  309. void RpcConnection::AsyncRpc_locked(
  310. const std::string &method_name, const ::google::protobuf::MessageLite *req,
  311. std::shared_ptr<::google::protobuf::MessageLite> resp,
  312. const RpcCallback &handler) {
  313. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  314. auto wrapped_handler =
  315. [resp, handler](pbio::CodedInputStream *is, const Status &status) {
  316. if (status.ok()) {
  317. if (is) { // Connect messages will not have an is
  318. ReadDelimitedPBMessage(is, resp.get());
  319. }
  320. }
  321. handler(status);
  322. };
  323. int call_id = (method_name != SASL_METHOD_NAME ? engine_->NextCallId() : RpcEngine::kCallIdSasl);
  324. auto r = std::make_shared<Request>(engine_, method_name, call_id, req,
  325. std::move(wrapped_handler));
  326. auto r_vector = std::vector<std::shared_ptr<Request> > (1, r);
  327. SendRpcRequests(r_vector);
  328. }
  329. void RpcConnection::AsyncRpc(const std::vector<std::shared_ptr<Request> > & requests) {
  330. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  331. SendRpcRequests(requests);
  332. }
  333. void RpcConnection::SendRpcRequests(const std::vector<std::shared_ptr<Request> > & requests) {
  334. LOG_TRACE(kRPC, << "RpcConnection::SendRpcRequests[] called; connected=" << ToString(connected_));
  335. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  336. if (connected_ == kDisconnected) {
  337. // Oops. The connection failed _just_ before the engine got a chance
  338. // to send it. Register it as a failure
  339. Status status = Status::ResourceUnavailable("RpcConnection closed before send.");
  340. engine_->AsyncRpcCommsError(status, shared_from_this(), requests);
  341. } else {
  342. pending_requests_.reserve(pending_requests_.size() + requests.size());
  343. for (auto r: requests) {
  344. if (r->method_name() != SASL_METHOD_NAME)
  345. pending_requests_.push_back(r);
  346. else
  347. auth_requests_.push_back(r);
  348. }
  349. if (connected_ == kConnected || connected_ == kHandshaking || connected_ == kAuthenticating) { // Dont flush if we're waiting or handshaking
  350. FlushPendingRequests();
  351. }
  352. }
  353. }
  354. void RpcConnection::PreEnqueueRequests(
  355. std::vector<std::shared_ptr<Request>> requests) {
  356. // Public method - acquire lock
  357. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  358. LOG_DEBUG(kRPC, << "RpcConnection::PreEnqueueRequests called");
  359. assert(connected_ == kNotYetConnected);
  360. pending_requests_.insert(pending_requests_.end(), requests.begin(),
  361. requests.end());
  362. // Don't start sending yet; will flush when connected
  363. }
  364. void RpcConnection::SetEventHandlers(std::shared_ptr<LibhdfsEvents> event_handlers) {
  365. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  366. event_handlers_ = event_handlers;
  367. if (sasl_protocol_) {
  368. sasl_protocol_->SetEventHandlers(event_handlers);
  369. }
  370. }
  371. void RpcConnection::SetClusterName(std::string cluster_name) {
  372. std::lock_guard<std::mutex> state_lock(connection_state_lock_);
  373. cluster_name_ = cluster_name;
  374. }
  375. void RpcConnection::CommsError(const Status &status) {
  376. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  377. Disconnect();
  378. // Anything that has been queued to the connection (on the fly or pending)
  379. // will get dinged for a retry
  380. std::vector<std::shared_ptr<Request>> requestsToReturn;
  381. std::transform(requests_on_fly_.begin(), requests_on_fly_.end(),
  382. std::back_inserter(requestsToReturn),
  383. std::bind(&RequestOnFlyMap::value_type::second, _1));
  384. requests_on_fly_.clear();
  385. requestsToReturn.insert(requestsToReturn.end(),
  386. std::make_move_iterator(pending_requests_.begin()),
  387. std::make_move_iterator(pending_requests_.end()));
  388. pending_requests_.clear();
  389. engine_->AsyncRpcCommsError(status, shared_from_this(), requestsToReturn);
  390. }
  391. void RpcConnection::ClearAndDisconnect(const ::asio::error_code &ec) {
  392. Disconnect();
  393. std::vector<std::shared_ptr<Request>> requests;
  394. std::transform(requests_on_fly_.begin(), requests_on_fly_.end(),
  395. std::back_inserter(requests),
  396. std::bind(&RequestOnFlyMap::value_type::second, _1));
  397. requests_on_fly_.clear();
  398. requests.insert(requests.end(),
  399. std::make_move_iterator(pending_requests_.begin()),
  400. std::make_move_iterator(pending_requests_.end()));
  401. pending_requests_.clear();
  402. for (const auto &req : requests) {
  403. req->OnResponseArrived(nullptr, ToStatus(ec));
  404. }
  405. }
  406. std::shared_ptr<Request> RpcConnection::RemoveFromRunningQueue(int call_id) {
  407. assert(lock_held(connection_state_lock_)); // Must be holding lock before calling
  408. auto it = requests_on_fly_.find(call_id);
  409. if (it == requests_on_fly_.end()) {
  410. return std::shared_ptr<Request>();
  411. }
  412. auto req = it->second;
  413. requests_on_fly_.erase(it);
  414. return req;
  415. }
  416. std::string RpcConnection::ToString(ConnectedState connected) {
  417. switch(connected) {
  418. case kNotYetConnected: return "NotYetConnected";
  419. case kConnecting: return "Connecting";
  420. case kHandshaking: return "Handshaking";
  421. case kAuthenticating: return "Authenticating";
  422. case kConnected: return "Connected";
  423. case kDisconnected: return "Disconnected";
  424. default: return "Invalid ConnectedState";
  425. }
  426. }
  427. }