filesystem.cc 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #include "filesystem.h"
  19. #include "common/namenode_info.h"
  20. #include <functional>
  21. #include <limits>
  22. #include <future>
  23. #include <tuple>
  24. #include <iostream>
  25. #include <pwd.h>
  26. #include <fnmatch.h>
  27. #define FMT_THIS_ADDR "this=" << (void*)this
  28. namespace hdfs {
  29. static const char kNamenodeProtocol[] = "org.apache.hadoop.hdfs.protocol.ClientProtocol";
  30. static const int kNamenodeProtocolVersion = 1;
  31. using ::asio::ip::tcp;
  32. static constexpr uint16_t kDefaultPort = 8020;
  33. // forward declarations
  34. const std::string get_effective_user_name(const std::string &);
  35. uint32_t FileSystem::GetDefaultFindMaxDepth() {
  36. return std::numeric_limits<uint32_t>::max();
  37. }
  38. uint16_t FileSystem::GetDefaultPermissionMask() {
  39. return 0755;
  40. }
  41. Status FileSystem::CheckValidPermissionMask(uint16_t permissions) {
  42. if (permissions > 01777) {
  43. std::stringstream errormsg;
  44. errormsg << "CheckValidPermissionMask: argument 'permissions' is " << std::oct
  45. << std::showbase << permissions << " (should be between 0 and 01777)";
  46. return Status::InvalidArgument(errormsg.str().c_str());
  47. }
  48. return Status::OK();
  49. }
  50. Status FileSystem::CheckValidReplication(uint16_t replication) {
  51. if (replication < 1 || replication > 512) {
  52. std::stringstream errormsg;
  53. errormsg << "CheckValidReplication: argument 'replication' is "
  54. << replication << " (should be between 1 and 512)";
  55. return Status::InvalidArgument(errormsg.str().c_str());
  56. }
  57. return Status::OK();
  58. }
  59. /*****************************************************************************
  60. * FILESYSTEM BASE CLASS
  61. ****************************************************************************/
  62. FileSystem *FileSystem::New(
  63. IoService *&io_service, const std::string &user_name, const Options &options) {
  64. return new FileSystemImpl(io_service, user_name, options);
  65. }
  66. FileSystem *FileSystem::New(
  67. std::shared_ptr<IoService> io_service, const std::string &user_name, const Options &options) {
  68. return new FileSystemImpl(io_service, user_name, options);
  69. }
  70. FileSystem *FileSystem::New() {
  71. // No, this pointer won't be leaked. The FileSystem takes ownership.
  72. std::shared_ptr<IoService> io_service = IoService::MakeShared();
  73. if(!io_service)
  74. return nullptr;
  75. int thread_count = io_service->InitDefaultWorkers();
  76. if(thread_count < 1)
  77. return nullptr;
  78. std::string user_name = get_effective_user_name("");
  79. Options options;
  80. return new FileSystemImpl(io_service, user_name, options);
  81. }
  82. /*****************************************************************************
  83. * FILESYSTEM IMPLEMENTATION
  84. ****************************************************************************/
  85. const std::string get_effective_user_name(const std::string &user_name) {
  86. if (!user_name.empty())
  87. return user_name;
  88. // If no user name was provided, try the HADOOP_USER_NAME and USER environment
  89. // variables
  90. const char * env = getenv("HADOOP_USER_NAME");
  91. if (env) {
  92. return env;
  93. }
  94. env = getenv("USER");
  95. if (env) {
  96. return env;
  97. }
  98. // If running on POSIX, use the currently logged in user
  99. #if defined(_POSIX_VERSION)
  100. uid_t uid = geteuid();
  101. struct passwd *pw = getpwuid(uid);
  102. if (pw && pw->pw_name)
  103. {
  104. return pw->pw_name;
  105. }
  106. #endif
  107. return "unknown_user";
  108. }
  109. FileSystemImpl::FileSystemImpl(IoService *&io_service, const std::string &user_name, const Options &options) :
  110. io_service_(static_cast<IoServiceImpl *>(io_service)), options_(options),
  111. client_name_(GetRandomClientName()),
  112. nn_(
  113. &io_service_->io_service(), options, client_name_,
  114. get_effective_user_name(user_name), kNamenodeProtocol,
  115. kNamenodeProtocolVersion
  116. ),
  117. bad_node_tracker_(std::make_shared<BadDataNodeTracker>()),
  118. event_handlers_(std::make_shared<LibhdfsEvents>())
  119. {
  120. LOG_DEBUG(kFileSystem, << "FileSystemImpl::FileSystemImpl("
  121. << FMT_THIS_ADDR << ") called");
  122. // Poor man's move
  123. io_service = nullptr;
  124. unsigned int running_workers = 0;
  125. if(options.io_threads_ < 1) {
  126. LOG_DEBUG(kFileSystem, << "FileSystemImpl::FileSystemImpl Initializing default number of worker threads");
  127. running_workers = io_service_->InitDefaultWorkers();
  128. } else {
  129. LOG_DEBUG(kFileSystem, << "FileSystemImpl::FileSystenImpl Initializing " << options_.io_threads_ << " worker threads.");
  130. running_workers = io_service->InitWorkers(options_.io_threads_);
  131. }
  132. if(running_workers < 1) {
  133. LOG_WARN(kFileSystem, << "FileSystemImpl::FileSystemImpl was unable to start worker threads");
  134. }
  135. }
  136. FileSystemImpl::FileSystemImpl(std::shared_ptr<IoService> io_service, const std::string& user_name, const Options &options) :
  137. io_service_(std::static_pointer_cast<IoServiceImpl>(io_service)), options_(options),
  138. client_name_(GetRandomClientName()),
  139. nn_(
  140. &io_service_->io_service(), options, client_name_,
  141. get_effective_user_name(user_name), kNamenodeProtocol,
  142. kNamenodeProtocolVersion
  143. ),
  144. bad_node_tracker_(std::make_shared<BadDataNodeTracker>()),
  145. event_handlers_(std::make_shared<LibhdfsEvents>())
  146. {
  147. LOG_DEBUG(kFileSystem, << "FileSystemImpl::FileSystemImpl("
  148. << FMT_THIS_ADDR << ", shared IoService@" << io_service_.get() << ") called");
  149. int worker_thread_count = io_service_->get_worker_thread_count();
  150. if(worker_thread_count < 1) {
  151. LOG_WARN(kFileSystem, << "FileSystemImpl::FileSystemImpl IoService provided doesn't have any worker threads. "
  152. << "It needs at least 1 worker to connect to an HDFS cluster.")
  153. } else {
  154. LOG_DEBUG(kFileSystem, << "FileSystemImpl::FileSystemImpl using " << worker_thread_count << " worker threads.");
  155. }
  156. }
  157. FileSystemImpl::~FileSystemImpl() {
  158. LOG_TRACE(kFileSystem, << "FileSystemImpl::~FileSystemImpl("
  159. << FMT_THIS_ADDR << ") called");
  160. /**
  161. * Note: IoService must be stopped before getting rid of worker threads.
  162. * Once worker threads are joined and deleted the service can be deleted.
  163. **/
  164. io_service_->Stop();
  165. }
  166. void FileSystemImpl::Connect(const std::string &server,
  167. const std::string &service,
  168. const std::function<void(const Status &, FileSystem * fs)> &handler) {
  169. LOG_INFO(kFileSystem, << "FileSystemImpl::Connect(" << FMT_THIS_ADDR
  170. << ", server=" << server << ", service="
  171. << service << ") called");
  172. /* IoService::New can return nullptr */
  173. if (!io_service_) {
  174. handler (Status::Error("Null IoService"), this);
  175. }
  176. // DNS lookup here for namenode(s)
  177. std::vector<ResolvedNamenodeInfo> resolved_namenodes;
  178. auto name_service = options_.services.find(server);
  179. if(name_service != options_.services.end()) {
  180. cluster_name_ = name_service->first;
  181. resolved_namenodes = BulkResolve(&io_service_->io_service(), name_service->second);
  182. } else {
  183. cluster_name_ = server + ":" + service;
  184. // tmp namenode info just to get this in the right format for BulkResolve
  185. NamenodeInfo tmp_info;
  186. optional<URI> uri = URI::parse_from_string("hdfs://" + cluster_name_);
  187. if(!uri) {
  188. LOG_ERROR(kFileSystem, << "Unable to use URI for cluster " << cluster_name_);
  189. handler(Status::Error(("Invalid namenode " + cluster_name_ + " in config").c_str()), this);
  190. }
  191. tmp_info.uri = uri.value();
  192. resolved_namenodes = BulkResolve(&io_service_->io_service(), {tmp_info});
  193. }
  194. for(unsigned int i=0;i<resolved_namenodes.size();i++) {
  195. LOG_DEBUG(kFileSystem, << "Resolved Namenode");
  196. LOG_DEBUG(kFileSystem, << resolved_namenodes[i].str());
  197. }
  198. nn_.Connect(cluster_name_, /*server, service*/ resolved_namenodes, [this, handler](const Status & s) {
  199. handler(s, this);
  200. });
  201. }
  202. void FileSystemImpl::ConnectToDefaultFs(const std::function<void(const Status &, FileSystem *)> &handler) {
  203. std::string scheme = options_.defaultFS.get_scheme();
  204. if (strcasecmp(scheme.c_str(), "hdfs") != 0) {
  205. std::string error_message;
  206. error_message += "defaultFS of [" + options_.defaultFS.str() + "] is not supported";
  207. handler(Status::InvalidArgument(error_message.c_str()), nullptr);
  208. return;
  209. }
  210. std::string host = options_.defaultFS.get_host();
  211. if (host.empty()) {
  212. handler(Status::InvalidArgument("defaultFS must specify a hostname"), nullptr);
  213. return;
  214. }
  215. optional<uint16_t> port = options_.defaultFS.get_port();
  216. if (!port) {
  217. port = kDefaultPort;
  218. }
  219. std::string port_as_string = std::to_string(*port);
  220. Connect(host, port_as_string, handler);
  221. }
  222. int FileSystemImpl::AddWorkerThread() {
  223. LOG_DEBUG(kFileSystem, << "FileSystemImpl::AddWorkerThread("
  224. << FMT_THIS_ADDR << ") called."
  225. << " Existing thread count = " << WorkerThreadCount());
  226. if(!io_service_)
  227. return -1;
  228. io_service_->AddWorkerThread();
  229. return 1;
  230. }
  231. int FileSystemImpl::WorkerThreadCount() {
  232. if(!io_service_) {
  233. return -1;
  234. } else {
  235. return io_service_->get_worker_thread_count();
  236. }
  237. }
  238. void FileSystemImpl::Open(
  239. const std::string &path,
  240. const std::function<void(const Status &, FileHandle *)> &handler) {
  241. LOG_DEBUG(kFileSystem, << "FileSystemImpl::Open("
  242. << FMT_THIS_ADDR << ", path="
  243. << path << ") called");
  244. nn_.GetBlockLocations(path, 0, std::numeric_limits<int64_t>::max(), [this, path, handler](const Status &stat, std::shared_ptr<const struct FileInfo> file_info) {
  245. if(!stat.ok()) {
  246. LOG_DEBUG(kFileSystem, << "FileSystemImpl::Open failed to get block locations. status=" << stat.ToString());
  247. if(stat.get_server_exception_type() == Status::kStandbyException) {
  248. LOG_DEBUG(kFileSystem, << "Operation not allowed on standby datanode");
  249. }
  250. }
  251. handler(stat, stat.ok() ? new FileHandleImpl(cluster_name_, path, &io_service_->io_service(), client_name_, file_info, bad_node_tracker_, event_handlers_)
  252. : nullptr);
  253. });
  254. }
  255. BlockLocation LocatedBlockToBlockLocation(const hadoop::hdfs::LocatedBlockProto & locatedBlock)
  256. {
  257. BlockLocation result;
  258. result.setCorrupt(locatedBlock.corrupt());
  259. result.setOffset(locatedBlock.offset());
  260. std::vector<DNInfo> dn_info;
  261. dn_info.reserve(locatedBlock.locs_size());
  262. for (const hadoop::hdfs::DatanodeInfoProto & datanode_info: locatedBlock.locs()) {
  263. const hadoop::hdfs::DatanodeIDProto &id = datanode_info.id();
  264. DNInfo newInfo;
  265. if (id.has_ipaddr())
  266. newInfo.setIPAddr(id.ipaddr());
  267. if (id.has_hostname())
  268. newInfo.setHostname(id.hostname());
  269. if (id.has_xferport())
  270. newInfo.setXferPort(id.xferport());
  271. if (id.has_infoport())
  272. newInfo.setInfoPort(id.infoport());
  273. if (id.has_ipcport())
  274. newInfo.setIPCPort(id.ipcport());
  275. if (id.has_infosecureport())
  276. newInfo.setInfoSecurePort(id.infosecureport());
  277. if (datanode_info.has_location())
  278. newInfo.setNetworkLocation(datanode_info.location());
  279. dn_info.push_back(newInfo);
  280. }
  281. result.setDataNodes(dn_info);
  282. if (locatedBlock.has_b()) {
  283. const hadoop::hdfs::ExtendedBlockProto & b=locatedBlock.b();
  284. result.setLength(b.numbytes());
  285. }
  286. return result;
  287. }
  288. void FileSystemImpl::GetBlockLocations(const std::string & path, uint64_t offset, uint64_t length,
  289. const std::function<void(const Status &, std::shared_ptr<FileBlockLocation> locations)> handler)
  290. {
  291. LOG_DEBUG(kFileSystem, << "FileSystemImpl::GetBlockLocations("
  292. << FMT_THIS_ADDR << ", path="
  293. << path << ") called");
  294. //Protobuf gives an error 'Negative value is not supported'
  295. //if the high bit is set in uint64 in GetBlockLocations
  296. if (IsHighBitSet(offset)) {
  297. handler(Status::InvalidArgument("GetBlockLocations: argument 'offset' cannot have high bit set"), nullptr);
  298. return;
  299. }
  300. if (IsHighBitSet(length)) {
  301. handler(Status::InvalidArgument("GetBlockLocations: argument 'length' cannot have high bit set"), nullptr);
  302. return;
  303. }
  304. auto conversion = [handler](const Status & status, std::shared_ptr<const struct FileInfo> fileInfo) {
  305. if (status.ok()) {
  306. auto result = std::make_shared<FileBlockLocation>();
  307. result->setFileLength(fileInfo->file_length_);
  308. result->setLastBlockComplete(fileInfo->last_block_complete_);
  309. result->setUnderConstruction(fileInfo->under_construction_);
  310. std::vector<BlockLocation> blocks;
  311. for (const hadoop::hdfs::LocatedBlockProto & locatedBlock: fileInfo->blocks_) {
  312. auto newLocation = LocatedBlockToBlockLocation(locatedBlock);
  313. blocks.push_back(newLocation);
  314. }
  315. result->setBlockLocations(blocks);
  316. handler(status, result);
  317. } else {
  318. handler(status, std::shared_ptr<FileBlockLocation>());
  319. }
  320. };
  321. nn_.GetBlockLocations(path, offset, length, conversion);
  322. }
  323. void FileSystemImpl::GetPreferredBlockSize(const std::string &path,
  324. const std::function<void(const Status &, const uint64_t &)> &handler) {
  325. LOG_DEBUG(kFileSystem, << "FileSystemImpl::GetPreferredBlockSize("
  326. << FMT_THIS_ADDR << ", path="
  327. << path << ") called");
  328. nn_.GetPreferredBlockSize(path, handler);
  329. }
  330. void FileSystemImpl::SetReplication(const std::string & path, int16_t replication, std::function<void(const Status &)> handler) {
  331. LOG_DEBUG(kFileSystem,
  332. << "FileSystemImpl::SetReplication(" << FMT_THIS_ADDR << ", path=" << path <<
  333. ", replication=" << replication << ") called");
  334. if (path.empty()) {
  335. handler(Status::InvalidArgument("SetReplication: argument 'path' cannot be empty"));
  336. return;
  337. }
  338. Status replStatus = FileSystem::CheckValidReplication(replication);
  339. if (!replStatus.ok()) {
  340. handler(replStatus);
  341. return;
  342. }
  343. nn_.SetReplication(path, replication, handler);
  344. }
  345. void FileSystemImpl::SetTimes(const std::string & path, uint64_t mtime, uint64_t atime,
  346. std::function<void(const Status &)> handler) {
  347. LOG_DEBUG(kFileSystem,
  348. << "FileSystemImpl::SetTimes(" << FMT_THIS_ADDR << ", path=" << path <<
  349. ", mtime=" << mtime << ", atime=" << atime << ") called");
  350. if (path.empty()) {
  351. handler(Status::InvalidArgument("SetTimes: argument 'path' cannot be empty"));
  352. return;
  353. }
  354. nn_.SetTimes(path, mtime, atime, handler);
  355. }
  356. void FileSystemImpl::GetFileInfo(
  357. const std::string &path,
  358. const std::function<void(const Status &, const StatInfo &)> &handler) {
  359. LOG_DEBUG(kFileSystem, << "FileSystemImpl::GetFileInfo("
  360. << FMT_THIS_ADDR << ", path="
  361. << path << ") called");
  362. nn_.GetFileInfo(path, handler);
  363. }
  364. void FileSystemImpl::GetFsStats(
  365. const std::function<void(const Status &, const FsInfo &)> &handler) {
  366. LOG_DEBUG(kFileSystem,
  367. << "FileSystemImpl::GetFsStats(" << FMT_THIS_ADDR << ") called");
  368. nn_.GetFsStats(handler);
  369. }
  370. /**
  371. * Helper function for recursive GetListing calls.
  372. *
  373. * Some compilers don't like recursive lambdas, so we make the lambda call a
  374. * method, which in turn creates a lambda calling itself.
  375. */
  376. void FileSystemImpl::GetListingShim(const Status &stat, const std::vector<StatInfo> & stat_infos, bool has_more,
  377. std::string path, const std::function<bool(const Status &, const std::vector<StatInfo> &, bool)> &handler) {
  378. bool has_next = !stat_infos.empty();
  379. bool get_more = handler(stat, stat_infos, has_more && has_next);
  380. if (get_more && has_more && has_next ) {
  381. auto callback = [this, path, handler](const Status &stat, const std::vector<StatInfo> & stat_infos, bool has_more) {
  382. GetListingShim(stat, stat_infos, has_more, path, handler);
  383. };
  384. std::string last = stat_infos.back().path;
  385. nn_.GetListing(path, callback, last);
  386. }
  387. }
  388. void FileSystemImpl::GetListing(
  389. const std::string &path,
  390. const std::function<bool(const Status &, const std::vector<StatInfo> &, bool)> &handler) {
  391. LOG_DEBUG(kFileSystem, << "FileSystemImpl::GetListing("
  392. << FMT_THIS_ADDR << ", path="
  393. << path << ") called");
  394. // Caputure the state and push it into the shim
  395. auto callback = [this, path, handler](const Status &stat, const std::vector<StatInfo> & stat_infos, bool has_more) {
  396. GetListingShim(stat, stat_infos, has_more, path, handler);
  397. };
  398. nn_.GetListing(path, callback);
  399. }
  400. void FileSystemImpl::Mkdirs(const std::string & path, uint16_t permissions, bool createparent,
  401. std::function<void(const Status &)> handler) {
  402. LOG_DEBUG(kFileSystem,
  403. << "FileSystemImpl::Mkdirs(" << FMT_THIS_ADDR << ", path=" << path <<
  404. ", permissions=" << permissions << ", createparent=" << createparent << ") called");
  405. if (path.empty()) {
  406. handler(Status::InvalidArgument("Mkdirs: argument 'path' cannot be empty"));
  407. return;
  408. }
  409. Status permStatus = FileSystem::CheckValidPermissionMask(permissions);
  410. if (!permStatus.ok()) {
  411. handler(permStatus);
  412. return;
  413. }
  414. nn_.Mkdirs(path, permissions, createparent, handler);
  415. }
  416. void FileSystemImpl::Delete(const std::string &path, bool recursive,
  417. const std::function<void(const Status &)> &handler) {
  418. LOG_DEBUG(kFileSystem,
  419. << "FileSystemImpl::Delete(" << FMT_THIS_ADDR << ", path=" << path << ", recursive=" << recursive << ") called");
  420. if (path.empty()) {
  421. handler(Status::InvalidArgument("Delete: argument 'path' cannot be empty"));
  422. return;
  423. }
  424. nn_.Delete(path, recursive, handler);
  425. }
  426. void FileSystemImpl::Rename(const std::string &oldPath, const std::string &newPath,
  427. const std::function<void(const Status &)> &handler) {
  428. LOG_DEBUG(kFileSystem,
  429. << "FileSystemImpl::Rename(" << FMT_THIS_ADDR << ", oldPath=" << oldPath << ", newPath=" << newPath << ") called");
  430. if (oldPath.empty()) {
  431. handler(Status::InvalidArgument("Rename: argument 'oldPath' cannot be empty"));
  432. return;
  433. }
  434. if (newPath.empty()) {
  435. handler(Status::InvalidArgument("Rename: argument 'newPath' cannot be empty"));
  436. return;
  437. }
  438. nn_.Rename(oldPath, newPath, handler);
  439. }
  440. void FileSystemImpl::SetPermission(const std::string & path,
  441. uint16_t permissions, const std::function<void(const Status &)> &handler) {
  442. LOG_DEBUG(kFileSystem,
  443. << "FileSystemImpl::SetPermission(" << FMT_THIS_ADDR << ", path=" << path << ", permissions=" << permissions << ") called");
  444. if (path.empty()) {
  445. handler(Status::InvalidArgument("SetPermission: argument 'path' cannot be empty"));
  446. return;
  447. }
  448. Status permStatus = FileSystem::CheckValidPermissionMask(permissions);
  449. if (!permStatus.ok()) {
  450. handler(permStatus);
  451. return;
  452. }
  453. nn_.SetPermission(path, permissions, handler);
  454. }
  455. void FileSystemImpl::SetOwner(const std::string & path, const std::string & username,
  456. const std::string & groupname, const std::function<void(const Status &)> &handler) {
  457. LOG_DEBUG(kFileSystem,
  458. << "FileSystemImpl::SetOwner(" << FMT_THIS_ADDR << ", path=" << path << ", username=" << username << ", groupname=" << groupname << ") called");
  459. if (path.empty()) {
  460. handler(Status::InvalidArgument("SetOwner: argument 'path' cannot be empty"));
  461. return;
  462. }
  463. nn_.SetOwner(path, username, groupname, handler);
  464. }
  465. /**
  466. * Helper function for recursive Find calls.
  467. *
  468. * Some compilers don't like recursive lambdas, so we make the lambda call a
  469. * method, which in turn creates a lambda calling itself.
  470. *
  471. * ***High-level explanation***
  472. *
  473. * Since we are allowing to use wild cards in both path and name, we start by expanding the path first.
  474. * Boolean search_path is set to true when we search for the path and false when we search for the name.
  475. * When we search for the path we break the given path pattern into sub-directories. Starting from the
  476. * first sub-directory we list them one-by-one and recursively continue into directories that matched the
  477. * path pattern at the current depth. Directories that are large will be requested to continue sending
  478. * the results. We keep track of the current depth within the path pattern in the 'depth' variable.
  479. * This continues recursively until the depth reaches the end of the path. Next that we start matching
  480. * the name pattern. All directories that we find we recurse now, and all names that match the given name
  481. * pattern are being stored in outputs and later sent back to the user.
  482. */
  483. void FileSystemImpl::FindShim(const Status &stat, const std::vector<StatInfo> & stat_infos, bool directory_has_more,
  484. std::shared_ptr<FindOperationalState> operational_state, std::shared_ptr<FindSharedState> shared_state) {
  485. //We buffer the outputs then send them back at the end
  486. std::vector<StatInfo> outputs;
  487. //Return on error
  488. if(!stat.ok()){
  489. std::lock_guard<std::mutex> find_lock(shared_state->lock);
  490. //We send true becuase we do not want the user code to exit before all our requests finished
  491. shared_state->handler(stat, outputs, true);
  492. shared_state->aborted = true;
  493. }
  494. if(!shared_state->aborted){
  495. //User did not abort the operation
  496. if (directory_has_more) {
  497. //Directory is large and has more results
  498. //We launch another async call to get more results
  499. shared_state->outstanding_requests++;
  500. auto callback = [this, operational_state, shared_state](const Status &stat, const std::vector<StatInfo> & stat_infos, bool has_more) {
  501. FindShim(stat, stat_infos, has_more, operational_state, shared_state);
  502. };
  503. std::string last = stat_infos.back().path;
  504. nn_.GetListing(operational_state->path, callback, last);
  505. }
  506. if(operational_state->search_path && operational_state->depth < shared_state->dirs.size() - 1){
  507. //We are searching for the path and did not reach the end of the path yet
  508. for (StatInfo const& si : stat_infos) {
  509. //If we are at the last depth and it matches both path and name, we need to output it.
  510. if (operational_state->depth == shared_state->dirs.size() - 2
  511. && !fnmatch(shared_state->dirs[operational_state->depth + 1].c_str(), si.path.c_str(), 0)
  512. && !fnmatch(shared_state->name.c_str(), si.path.c_str(), 0)) {
  513. outputs.push_back(si);
  514. }
  515. //Skip if not directory
  516. if(si.file_type != StatInfo::IS_DIR) {
  517. continue;
  518. }
  519. //Checking for a match with the path at the current depth
  520. if(!fnmatch(shared_state->dirs[operational_state->depth + 1].c_str(), si.path.c_str(), 0)){
  521. //Launch a new requests for every matched directory
  522. shared_state->outstanding_requests++;
  523. auto callback = [this, si, operational_state, shared_state](const Status &stat, const std::vector<StatInfo> & stat_infos, bool has_more) {
  524. std::shared_ptr<FindOperationalState> new_current_state = std::make_shared<FindOperationalState>(si.full_path, operational_state->depth + 1, true); //true because searching for the path
  525. FindShim(stat, stat_infos, has_more, new_current_state, shared_state);
  526. };
  527. nn_.GetListing(si.full_path, callback);
  528. }
  529. }
  530. }
  531. else if(shared_state->maxdepth > operational_state->depth - shared_state->dirs.size() + 1){
  532. //We are searching for the name now and maxdepth has not been reached
  533. for (StatInfo const& si : stat_infos) {
  534. //Launch a new request for every directory
  535. if(si.file_type == StatInfo::IS_DIR) {
  536. shared_state->outstanding_requests++;
  537. auto callback = [this, si, operational_state, shared_state](const Status &stat, const std::vector<StatInfo> & stat_infos, bool has_more) {
  538. std::shared_ptr<FindOperationalState> new_current_state = std::make_shared<FindOperationalState>(si.full_path, operational_state->depth + 1, false); //false because searching for the name
  539. FindShim(stat, stat_infos, has_more, new_current_state, shared_state);
  540. };
  541. nn_.GetListing(si.full_path, callback);
  542. }
  543. //All names that match the specified name are saved to outputs
  544. if(!fnmatch(shared_state->name.c_str(), si.path.c_str(), 0)){
  545. outputs.push_back(si);
  546. }
  547. }
  548. }
  549. }
  550. //This section needs a lock to make sure we return the final chunk only once
  551. //and no results are sent after aborted is set
  552. std::lock_guard<std::mutex> find_lock(shared_state->lock);
  553. //Decrement the counter once since we are done with this chunk
  554. shared_state->outstanding_requests--;
  555. if(shared_state->outstanding_requests == 0){
  556. //Send the outputs back to the user and notify that this is the final chunk
  557. shared_state->handler(stat, outputs, false);
  558. } else {
  559. //There will be more results and we are not aborting
  560. if (outputs.size() > 0 && !shared_state->aborted){
  561. //Send the outputs back to the user and notify that there is more
  562. bool user_wants_more = shared_state->handler(stat, outputs, true);
  563. if(!user_wants_more) {
  564. //Abort if user doesn't want more
  565. shared_state->aborted = true;
  566. }
  567. }
  568. }
  569. }
  570. void FileSystemImpl::Find(
  571. const std::string &path, const std::string &name, const uint32_t maxdepth,
  572. const std::function<bool(const Status &, const std::vector<StatInfo> &, bool)> &handler) {
  573. LOG_DEBUG(kFileSystem, << "FileSystemImpl::Find("
  574. << FMT_THIS_ADDR << ", path="
  575. << path << ", name="
  576. << name << ") called");
  577. //Populating the operational state, which includes:
  578. //current search path, depth within the path, and the indication that we are currently searching for a path (not name yet).
  579. std::shared_ptr<FindOperationalState> operational_state = std::make_shared<FindOperationalState>(path, 0, true);
  580. //Populating the shared state, which includes:
  581. //vector of sub-directories constructed from path, name to search, handler to use for result returning, outstanding_requests counter, and aborted flag.
  582. std::shared_ptr<FindSharedState> shared_state = std::make_shared<FindSharedState>(path, name, maxdepth, handler, 1, false);
  583. auto callback = [this, operational_state, shared_state](const Status &stat, const std::vector<StatInfo> & stat_infos, bool directory_has_more) {
  584. FindShim(stat, stat_infos, directory_has_more, operational_state, shared_state);
  585. };
  586. nn_.GetListing("/", callback);
  587. }
  588. void FileSystemImpl::CreateSnapshot(const std::string &path,
  589. const std::string &name,
  590. const std::function<void(const Status &)> &handler) {
  591. LOG_DEBUG(kFileSystem,
  592. << "FileSystemImpl::CreateSnapshot(" << FMT_THIS_ADDR << ", path=" << path << ", name=" << name << ") called");
  593. if (path.empty()) {
  594. handler(Status::InvalidArgument("CreateSnapshot: argument 'path' cannot be empty"));
  595. return;
  596. }
  597. nn_.CreateSnapshot(path, name, handler);
  598. }
  599. void FileSystemImpl::DeleteSnapshot(const std::string &path,
  600. const std::string &name,
  601. const std::function<void(const Status &)> &handler) {
  602. LOG_DEBUG(kFileSystem,
  603. << "FileSystemImpl::DeleteSnapshot(" << FMT_THIS_ADDR << ", path=" << path << ", name=" << name << ") called");
  604. if (path.empty()) {
  605. handler(Status::InvalidArgument("DeleteSnapshot: argument 'path' cannot be empty"));
  606. return;
  607. }
  608. if (name.empty()) {
  609. handler(Status::InvalidArgument("DeleteSnapshot: argument 'name' cannot be empty"));
  610. return;
  611. }
  612. nn_.DeleteSnapshot(path, name, handler);
  613. }
  614. void FileSystemImpl::AllowSnapshot(const std::string &path,
  615. const std::function<void(const Status &)> &handler) {
  616. LOG_DEBUG(kFileSystem,
  617. << "FileSystemImpl::AllowSnapshot(" << FMT_THIS_ADDR << ", path=" << path << ") called");
  618. if (path.empty()) {
  619. handler(Status::InvalidArgument("AllowSnapshot: argument 'path' cannot be empty"));
  620. return;
  621. }
  622. nn_.AllowSnapshot(path, handler);
  623. }
  624. void FileSystemImpl::DisallowSnapshot(const std::string &path,
  625. const std::function<void(const Status &)> &handler) {
  626. LOG_DEBUG(kFileSystem,
  627. << "FileSystemImpl::DisallowSnapshot(" << FMT_THIS_ADDR << ", path=" << path << ") called");
  628. if (path.empty()) {
  629. handler(Status::InvalidArgument("DisallowSnapshot: argument 'path' cannot be empty"));
  630. return;
  631. }
  632. nn_.DisallowSnapshot(path, handler);
  633. }
  634. void FileSystemImpl::SetFsEventCallback(fs_event_callback callback) {
  635. if (event_handlers_) {
  636. event_handlers_->set_fs_callback(callback);
  637. nn_.SetFsEventCallback(callback);
  638. }
  639. }
  640. std::shared_ptr<LibhdfsEvents> FileSystemImpl::get_event_handlers() {
  641. return event_handlers_;
  642. }
  643. Options FileSystemImpl::get_options() {
  644. return options_;
  645. }
  646. }