bad_datanode_test.cc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #include "fs/filesystem.h"
  19. #include "fs/bad_datanode_tracker.h"
  20. #include "common/libhdfs_events_impl.h"
  21. #include "common/util.h"
  22. #include <gmock/gmock.h>
  23. using hadoop::common::TokenProto;
  24. using hadoop::hdfs::DatanodeInfoProto;
  25. using hadoop::hdfs::DatanodeIDProto;
  26. using hadoop::hdfs::ExtendedBlockProto;
  27. using hadoop::hdfs::LocatedBlockProto;
  28. using hadoop::hdfs::LocatedBlocksProto;
  29. using ::testing::_;
  30. using ::testing::InvokeArgument;
  31. using ::testing::Return;
  32. using namespace hdfs;
  33. class MockReader : public BlockReader {
  34. public:
  35. MOCK_METHOD2(
  36. AsyncReadPacket,
  37. void(const asio::mutable_buffers_1 &,
  38. const std::function<void(const Status &, size_t transferred)> &));
  39. MOCK_METHOD5(AsyncRequestBlock,
  40. void(const std::string &client_name,
  41. const hadoop::hdfs::ExtendedBlockProto *block,
  42. uint64_t length, uint64_t offset,
  43. const std::function<void(Status)> &handler));
  44. MOCK_METHOD5(AsyncReadBlock, void(
  45. const std::string & client_name,
  46. const hadoop::hdfs::LocatedBlockProto &block,
  47. size_t offset,
  48. const MutableBuffers &buffers,
  49. const std::function<void(const Status &, size_t)> handler));
  50. virtual void CancelOperation() override {
  51. /* no-op, declared pure virtual */
  52. }
  53. };
  54. class MockDNConnection : public DataNodeConnection, public std::enable_shared_from_this<MockDNConnection> {
  55. void Connect(std::function<void(Status status, std::shared_ptr<DataNodeConnection> dn)> handler) override {
  56. handler(Status::OK(), shared_from_this());
  57. }
  58. void async_read_some(const MutableBuffers &buf,
  59. std::function<void (const asio::error_code & error,
  60. std::size_t bytes_transferred) > handler) override {
  61. (void)buf;
  62. handler(asio::error::fault, 0);
  63. }
  64. void async_write_some(const ConstBuffers &buf,
  65. std::function<void (const asio::error_code & error,
  66. std::size_t bytes_transferred) > handler) override {
  67. (void)buf;
  68. handler(asio::error::fault, 0);
  69. }
  70. virtual void Cancel() override {
  71. /* no-op, declared pure virtual */
  72. }
  73. };
  74. class PartialMockFileHandle : public FileHandleImpl {
  75. using FileHandleImpl::FileHandleImpl;
  76. public:
  77. std::shared_ptr<MockReader> mock_reader_ = std::make_shared<MockReader>();
  78. protected:
  79. std::shared_ptr<BlockReader> CreateBlockReader(const BlockReaderOptions &options,
  80. std::shared_ptr<DataNodeConnection> dn,
  81. std::shared_ptr<hdfs::LibhdfsEvents> event_handlers) override
  82. {
  83. (void) options; (void) dn; (void) event_handlers;
  84. assert(mock_reader_);
  85. return mock_reader_;
  86. }
  87. std::shared_ptr<DataNodeConnection> CreateDataNodeConnection(
  88. ::asio::io_service *io_service,
  89. const ::hadoop::hdfs::DatanodeInfoProto & dn,
  90. const hadoop::common::TokenProto * token) override {
  91. (void) io_service; (void) dn; (void) token;
  92. return std::make_shared<MockDNConnection>();
  93. }
  94. };
  95. TEST(BadDataNodeTest, TestNoNodes) {
  96. auto file_info = std::make_shared<struct FileInfo>();
  97. file_info->blocks_.push_back(LocatedBlockProto());
  98. LocatedBlockProto & block = file_info->blocks_[0];
  99. ExtendedBlockProto *b = block.mutable_b();
  100. b->set_poolid("");
  101. b->set_blockid(1);
  102. b->set_generationstamp(1);
  103. b->set_numbytes(4096);
  104. // Set up the one block to have one datanode holding it
  105. DatanodeInfoProto *di = block.add_locs();
  106. DatanodeIDProto *dnid = di->mutable_id();
  107. dnid->set_datanodeuuid("foo");
  108. char buf[4096] = {
  109. 0,
  110. };
  111. IoServiceImpl io_service;
  112. auto bad_node_tracker = std::make_shared<BadDataNodeTracker>();
  113. auto monitors = std::make_shared<LibhdfsEvents>();
  114. bad_node_tracker->AddBadNode("foo");
  115. PartialMockFileHandle is("cluster", "file", &io_service.io_service(), GetRandomClientName(), file_info, bad_node_tracker, monitors);
  116. Status stat;
  117. size_t read = 0;
  118. // Exclude the one datanode with the data
  119. is.AsyncPreadSome(0, asio::buffer(buf, sizeof(buf)), nullptr,
  120. [&stat, &read](const Status &status, const std::string &, size_t transferred) {
  121. stat = status;
  122. read = transferred;
  123. });
  124. // Should fail with no resource available
  125. ASSERT_EQ(static_cast<int>(std::errc::resource_unavailable_try_again), stat.code());
  126. ASSERT_EQ(0UL, read);
  127. }
  128. TEST(BadDataNodeTest, NNEventCallback) {
  129. auto file_info = std::make_shared<struct FileInfo>();
  130. file_info->blocks_.push_back(LocatedBlockProto());
  131. LocatedBlockProto & block = file_info->blocks_[0];
  132. ExtendedBlockProto *b = block.mutable_b();
  133. b->set_poolid("");
  134. b->set_blockid(1);
  135. b->set_generationstamp(1);
  136. b->set_numbytes(4096);
  137. // Set up the one block to have one datanodes holding it
  138. DatanodeInfoProto *di = block.add_locs();
  139. DatanodeIDProto *dnid = di->mutable_id();
  140. dnid->set_datanodeuuid("dn1");
  141. char buf[4096] = {
  142. 0,
  143. };
  144. IoServiceImpl io_service;
  145. auto tracker = std::make_shared<BadDataNodeTracker>();
  146. // Set up event callbacks
  147. int calls = 0;
  148. std::vector<std::string> callbacks;
  149. auto monitors = std::make_shared<LibhdfsEvents>();
  150. monitors->set_file_callback([&calls, &callbacks] (const char * event,
  151. const char * cluster,
  152. const char * file,
  153. int64_t value) {
  154. (void)cluster; (void) file; (void)value;
  155. callbacks.push_back(event);
  156. // Allow connect call to succeed by fail on read
  157. if (calls++ == 1)
  158. return event_response::test_err(Status::Error("Test"));
  159. return event_response::ok();
  160. });
  161. PartialMockFileHandle is("cluster", "file", &io_service.io_service(), GetRandomClientName(), file_info, tracker, monitors);
  162. Status stat;
  163. size_t read = 0;
  164. EXPECT_CALL(*is.mock_reader_, AsyncReadBlock(_,_,_,_,_))
  165. // Will return OK, but our callback will subvert it
  166. .WillOnce(InvokeArgument<4>(
  167. Status::OK(), 0));
  168. is.AsyncPreadSome(
  169. 0, asio::buffer(buf, sizeof(buf)), nullptr,
  170. [&stat, &read](const Status &status, const std::string &,
  171. size_t transferred) {
  172. stat = status;
  173. read = transferred;
  174. });
  175. ASSERT_FALSE(stat.ok());
  176. ASSERT_EQ(2, callbacks.size());
  177. ASSERT_EQ(FILE_DN_CONNECT_EVENT, callbacks[0]);
  178. ASSERT_EQ(FILE_DN_READ_EVENT, callbacks[1]);
  179. }
  180. TEST(BadDataNodeTest, RecoverableError) {
  181. auto file_info = std::make_shared<struct FileInfo>();
  182. file_info->blocks_.push_back(LocatedBlockProto());
  183. LocatedBlockProto & block = file_info->blocks_[0];
  184. ExtendedBlockProto *b = block.mutable_b();
  185. b->set_poolid("");
  186. b->set_blockid(1);
  187. b->set_generationstamp(1);
  188. b->set_numbytes(4096);
  189. // Set up the one block to have one datanode holding it
  190. DatanodeInfoProto *di = block.add_locs();
  191. DatanodeIDProto *dnid = di->mutable_id();
  192. dnid->set_datanodeuuid("foo");
  193. char buf[4096] = {
  194. 0,
  195. };
  196. IoServiceImpl io_service;
  197. auto tracker = std::make_shared<BadDataNodeTracker>();
  198. auto monitors = std::make_shared<LibhdfsEvents>();
  199. PartialMockFileHandle is("cluster", "file", &io_service.io_service(), GetRandomClientName(), file_info, tracker, monitors);
  200. Status stat;
  201. size_t read = 0;
  202. EXPECT_CALL(*is.mock_reader_, AsyncReadBlock(_,_,_,_,_))
  203. // resource unavailable error
  204. .WillOnce(InvokeArgument<4>(
  205. Status::ResourceUnavailable("Unable to get some resource, try again later"), 0));
  206. is.AsyncPreadSome(
  207. 0, asio::buffer(buf, sizeof(buf)), nullptr,
  208. [&stat, &read](const Status &status, const std::string &,
  209. size_t transferred) {
  210. stat = status;
  211. read = transferred;
  212. });
  213. ASSERT_FALSE(stat.ok());
  214. std::string failing_dn = "id_of_bad_datanode";
  215. if (!stat.ok()) {
  216. if (FileHandle::ShouldExclude(stat)) {
  217. tracker->AddBadNode(failing_dn);
  218. }
  219. }
  220. ASSERT_FALSE(tracker->IsBadNode(failing_dn));
  221. }
  222. TEST(BadDataNodeTest, InternalError) {
  223. auto file_info = std::make_shared<struct FileInfo>();
  224. file_info->blocks_.push_back(LocatedBlockProto());
  225. LocatedBlockProto & block = file_info->blocks_[0];
  226. ExtendedBlockProto *b = block.mutable_b();
  227. b->set_poolid("");
  228. b->set_blockid(1);
  229. b->set_generationstamp(1);
  230. b->set_numbytes(4096);
  231. // Set up the one block to have one datanode holding it
  232. DatanodeInfoProto *di = block.add_locs();
  233. DatanodeIDProto *dnid = di->mutable_id();
  234. dnid->set_datanodeuuid("foo");
  235. char buf[4096] = {
  236. 0,
  237. };
  238. IoServiceImpl io_service;
  239. auto tracker = std::make_shared<BadDataNodeTracker>();
  240. auto monitors = std::make_shared<LibhdfsEvents>();
  241. PartialMockFileHandle is("cluster", "file", &io_service.io_service(), GetRandomClientName(), file_info, tracker, monitors);
  242. Status stat;
  243. size_t read = 0;
  244. EXPECT_CALL(*is.mock_reader_, AsyncReadBlock(_,_,_,_,_))
  245. // resource unavailable error
  246. .WillOnce(InvokeArgument<4>(
  247. Status::Exception("server_explosion_exception",
  248. "the server exploded"),
  249. sizeof(buf)));
  250. is.AsyncPreadSome(
  251. 0, asio::buffer(buf, sizeof(buf)), nullptr,
  252. [&stat, &read](const Status &status, const std::string &,
  253. size_t transferred) {
  254. stat = status;
  255. read = transferred;
  256. });
  257. ASSERT_FALSE(stat.ok());
  258. std::string failing_dn = "id_of_bad_datanode";
  259. if (!stat.ok()) {
  260. if (FileHandle::ShouldExclude(stat)) {
  261. tracker->AddBadNode(failing_dn);
  262. }
  263. }
  264. ASSERT_TRUE(tracker->IsBadNode(failing_dn));
  265. }
  266. int main(int argc, char *argv[]) {
  267. // The following line must be executed to initialize Google Mock
  268. // (and Google Test) before running the tests.
  269. ::testing::InitGoogleMock(&argc, argv);
  270. int exit_code = RUN_ALL_TESTS();
  271. // Clean up static data and prevent valgrind memory leaks
  272. google::protobuf::ShutdownProtobufLibrary();
  273. return exit_code;
  274. }