hdfs_setrep.cc 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. /*
  2. Licensed to the Apache Software Foundation (ASF) under one
  3. or more contributor license agreements. See the NOTICE file
  4. distributed with this work for additional information
  5. regarding copyright ownership. The ASF licenses this file
  6. to you under the Apache License, Version 2.0 (the
  7. "License"); you may not use this file except in compliance
  8. with the License. You may obtain a copy of the License at
  9. http://www.apache.org/licenses/LICENSE-2.0
  10. Unless required by applicable law or agreed to in writing,
  11. software distributed under the License is distributed on an
  12. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  13. KIND, either express or implied. See the License for the
  14. specific language governing permissions and limitations
  15. under the License.
  16. */
  17. #include <google/protobuf/stubs/common.h>
  18. #include <unistd.h>
  19. #include <future>
  20. #include "tools_common.h"
  21. void usage(){
  22. std::cout << "Usage: hdfs_setrep [OPTION] NUM_REPLICAS PATH"
  23. << std::endl
  24. << std::endl << "Changes the replication factor of a file at PATH. If PATH is a directory then the command"
  25. << std::endl << "recursively changes the replication factor of all files under the directory tree rooted at PATH."
  26. << std::endl
  27. << std::endl << " -h display this help and exit"
  28. << std::endl
  29. << std::endl << "Examples:"
  30. << std::endl << "hdfs_setrep 5 hdfs://localhost.localdomain:8020/dir/file"
  31. << std::endl << "hdfs_setrep 3 /dir1/dir2"
  32. << std::endl;
  33. }
  34. struct SetReplicationState {
  35. const uint16_t replication;
  36. const std::function<void(const hdfs::Status &)> handler;
  37. //The request counter is incremented once every time SetReplication async call is made
  38. uint64_t request_counter;
  39. //This boolean will be set when find returns the last result
  40. bool find_is_done;
  41. //Final status to be returned
  42. hdfs::Status status;
  43. //Shared variables will need protection with a lock
  44. std::mutex lock;
  45. SetReplicationState(const uint16_t replication_, const std::function<void(const hdfs::Status &)> & handler_,
  46. uint64_t request_counter_, bool find_is_done_)
  47. : replication(replication_),
  48. handler(handler_),
  49. request_counter(request_counter_),
  50. find_is_done(find_is_done_),
  51. status(),
  52. lock() {
  53. }
  54. };
  55. int main(int argc, char *argv[]) {
  56. //We should have 3 or 4 parameters
  57. if (argc < 3) {
  58. usage();
  59. exit(EXIT_FAILURE);
  60. }
  61. int input;
  62. //Using GetOpt to read in the values
  63. opterr = 0;
  64. while ((input = getopt(argc, argv, "h")) != -1) {
  65. switch (input)
  66. {
  67. case 'h':
  68. usage();
  69. exit(EXIT_SUCCESS);
  70. case '?':
  71. if (isprint(optopt))
  72. std::cerr << "Unknown option `-" << (char) optopt << "'." << std::endl;
  73. else
  74. std::cerr << "Unknown option character `" << (char) optopt << "'." << std::endl;
  75. usage();
  76. exit(EXIT_FAILURE);
  77. default:
  78. exit(EXIT_FAILURE);
  79. }
  80. }
  81. std::string repl = argv[optind];
  82. std::string uri_path = argv[optind + 1];
  83. //Building a URI object from the given uri_path
  84. hdfs::URI uri = hdfs::parse_path_or_exit(uri_path);
  85. std::shared_ptr<hdfs::FileSystem> fs = hdfs::doConnect(uri, true);
  86. if (!fs) {
  87. std::cerr << "Could not connect the file system. " << std::endl;
  88. exit(EXIT_FAILURE);
  89. }
  90. /* wrap async FileSystem::SetReplication with promise to make it a blocking call */
  91. std::shared_ptr<std::promise<hdfs::Status>> promise = std::make_shared<std::promise<hdfs::Status>>();
  92. std::future<hdfs::Status> future(promise->get_future());
  93. auto handler = [promise](const hdfs::Status &s) {
  94. promise->set_value(s);
  95. };
  96. uint16_t replication = std::stoi(repl.c_str(), NULL, 8);
  97. //Allocating shared state, which includes:
  98. //replication to be set, handler to be called, request counter, and a boolean to keep track if find is done
  99. std::shared_ptr<SetReplicationState> state = std::make_shared<SetReplicationState>(replication, handler, 0, false);
  100. // Keep requesting more from Find until we process the entire listing. Call handler when Find is done and reques counter is 0.
  101. // Find guarantees that the handler will only be called once at a time so we do not need locking in handlerFind.
  102. auto handlerFind = [fs, state](const hdfs::Status &status_find, const std::vector<hdfs::StatInfo> & stat_infos, bool has_more_results) -> bool {
  103. //For each result returned by Find we call async SetReplication with the handler below.
  104. //SetReplication DOES NOT guarantee that the handler will only be called once at a time, so we DO need locking in handlerSetReplication.
  105. auto handlerSetReplication = [state](const hdfs::Status &status_set_replication) {
  106. std::lock_guard<std::mutex> guard(state->lock);
  107. //Decrement the counter once since we are done with this async call
  108. if (!status_set_replication.ok() && state->status.ok()){
  109. //We make sure we set state->status only on the first error.
  110. state->status = status_set_replication;
  111. }
  112. state->request_counter--;
  113. if(state->request_counter == 0 && state->find_is_done){
  114. state->handler(state->status); //exit
  115. }
  116. };
  117. if(!stat_infos.empty() && state->status.ok()) {
  118. for (hdfs::StatInfo const& s : stat_infos) {
  119. //Launch an asynchronous call to SetReplication for every returned file
  120. if(s.file_type == hdfs::StatInfo::IS_FILE){
  121. state->request_counter++;
  122. fs->SetReplication(s.full_path, state->replication, handlerSetReplication);
  123. }
  124. }
  125. }
  126. //Lock this section because handlerSetReplication might be accessing the same
  127. //shared variables simultaneously
  128. std::lock_guard<std::mutex> guard(state->lock);
  129. if (!status_find.ok() && state->status.ok()){
  130. //We make sure we set state->status only on the first error.
  131. state->status = status_find;
  132. }
  133. if(!has_more_results){
  134. state->find_is_done = true;
  135. if(state->request_counter == 0){
  136. state->handler(state->status); //exit
  137. }
  138. return false;
  139. }
  140. return true;
  141. };
  142. //Asynchronous call to Find
  143. fs->Find(uri.get_path(), "*", hdfs::FileSystem::GetDefaultFindMaxDepth(), handlerFind);
  144. /* block until promise is set */
  145. hdfs::Status status = future.get();
  146. if (!status.ok()) {
  147. std::cerr << "Error: " << status.ToString() << std::endl;
  148. exit(EXIT_FAILURE);
  149. }
  150. // Clean up static data and prevent valgrind memory leaks
  151. google::protobuf::ShutdownProtobufLibrary();
  152. return 0;
  153. }