find.cc 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. /*
  2. Licensed to the Apache Software Foundation (ASF) under one
  3. or more contributor license agreements. See the NOTICE file
  4. distributed with this work for additional information
  5. regarding copyright ownership. The ASF licenses this file
  6. to you under the Apache License, Version 2.0 (the
  7. "License"); you may not use this file except in compliance
  8. with the License. You may obtain a copy of the License at
  9. http://www.apache.org/licenses/LICENSE-2.0
  10. Unless required by applicable law or agreed to in writing,
  11. software distributed under the License is distributed on an
  12. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  13. KIND, either express or implied. See the License for the
  14. specific language governing permissions and limitations
  15. under the License.
  16. */
  17. /**
  18. * A parallel find tool example.
  19. *
  20. * Finds all files matching the specified name recursively starting from the
  21. * specified directory and prints their filepaths. Works either synchronously
  22. * or asynchronously.
  23. *
  24. * Usage: find /<path-to-file> <file-name> <use_async>
  25. *
  26. * Example: find /dir?/tree* some?file*name 1
  27. *
  28. * @param path-to-file Absolute path at which to begin search, can have wild
  29. * cards and must be non-blank
  30. * @param file-name Name to find, can have wild cards and must be non-blank
  31. * @param use_async If set to 1 it prints out results asynchronously as
  32. * they arrive. If set to 0 results are printed in one
  33. * big chunk when it becomes available.
  34. *
  35. **/
  36. #include "hdfspp/hdfspp.h"
  37. #include <google/protobuf/stubs/common.h>
  38. #include <future>
  39. #include "tools_common.h"
  40. void SyncFind(std::shared_ptr<hdfs::FileSystem> fs, const std::string &path, const std::string &name){
  41. std::vector<hdfs::StatInfo> results;
  42. //Synchronous call to Find
  43. hdfs::Status stat = fs->Find(path, name, hdfs::FileSystem::GetDefaultFindMaxDepth(), &results);
  44. if (!stat.ok()) {
  45. std::cerr << "Error: " << stat.ToString() << std::endl;
  46. }
  47. if(results.empty()){
  48. std::cout << "Nothing Found" << std::endl;
  49. } else {
  50. //Printing out the results
  51. for (hdfs::StatInfo const& si : results) {
  52. std::cout << si.full_path << std::endl;
  53. }
  54. }
  55. }
  56. void AsyncFind(std::shared_ptr<hdfs::FileSystem> fs, const std::string &path, const std::string &name){
  57. std::promise<void> promise;
  58. std::future<void> future(promise.get_future());
  59. bool something_found = false;
  60. hdfs::Status status = hdfs::Status::OK();
  61. /**
  62. * Keep requesting more until we get the entire listing. Set the promise
  63. * when we have the entire listing to stop.
  64. *
  65. * Find guarantees that the handler will only be called once at a time,
  66. * so we do not need any locking here
  67. */
  68. auto handler = [&promise, &status, &something_found]
  69. (const hdfs::Status &s, const std::vector<hdfs::StatInfo> & si, bool has_more_results) -> bool {
  70. //Print result chunks as they arrive
  71. if(!si.empty()) {
  72. something_found = true;
  73. for (hdfs::StatInfo const& s : si) {
  74. std::cout << s.full_path << std::endl;
  75. }
  76. }
  77. if(!s.ok() && status.ok()){
  78. //We make sure we set 'status' only on the first error.
  79. status = s;
  80. }
  81. if (!has_more_results) {
  82. promise.set_value(); //set promise
  83. return false; //request stop sending results
  84. }
  85. return true; //request more results
  86. };
  87. //Asynchronous call to Find
  88. fs->Find(path, name, hdfs::FileSystem::GetDefaultFindMaxDepth(), handler);
  89. //block until promise is set
  90. future.get();
  91. if(!status.ok()) {
  92. std::cerr << "Error: " << status.ToString() << std::endl;
  93. }
  94. if(!something_found){
  95. std::cout << "Nothing Found" << std::endl;
  96. }
  97. }
  98. int main(int argc, char *argv[]) {
  99. if (argc != 4) {
  100. std::cerr << "usage: find /<path-to-file> <file-name> <use_async>" << std::endl;
  101. exit(EXIT_FAILURE);
  102. }
  103. std::string path = argv[1];
  104. std::string name = argv[2];
  105. bool use_async = (std::stoi(argv[3]) != 0);
  106. //Building a URI object from the given uri path
  107. hdfs::URI uri = hdfs::parse_path_or_exit(path);
  108. std::shared_ptr<hdfs::FileSystem> fs = hdfs::doConnect(uri, true);
  109. if (!fs) {
  110. std::cerr << "Could not connect the file system. " << std::endl;
  111. exit(EXIT_FAILURE);
  112. }
  113. if (use_async){
  114. //Example of Async find
  115. AsyncFind(fs, path, name);
  116. } else {
  117. //Example of Sync find
  118. SyncFind(fs, path, name);
  119. }
  120. // Clean up static data and prevent valgrind memory leaks
  121. google::protobuf::ShutdownProtobufLibrary();
  122. return 0;
  123. }