hdfspp.h 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #ifndef LIBHDFSPP_HDFSPP_H_
  19. #define LIBHDFSPP_HDFSPP_H_
  20. #include "hdfspp/options.h"
  21. #include "hdfspp/status.h"
  22. #include "hdfspp/events.h"
  23. #include "hdfspp/block_location.h"
  24. #include "hdfspp/statinfo.h"
  25. #include "hdfspp/fsinfo.h"
  26. #include <functional>
  27. #include <memory>
  28. #include <set>
  29. #include <iostream>
  30. namespace hdfs {
  31. /**
  32. * An IoService manages a queue of asynchronous tasks. All libhdfs++
  33. * operations are filed against a particular IoService.
  34. *
  35. * When an operation is queued into an IoService, the IoService will
  36. * run the callback handler associated with the operation. Note that
  37. * the IoService must be stopped before destructing the objects that
  38. * file the operations.
  39. *
  40. * From an implementation point of view the IoService object wraps the
  41. * ::asio::io_service objects. Please see the related documentation
  42. * for more details.
  43. **/
  44. class IoService {
  45. public:
  46. static IoService *New();
  47. /**
  48. * Run the asynchronous tasks associated with this IoService.
  49. **/
  50. virtual void Run() = 0;
  51. /**
  52. * Stop running asynchronous tasks associated with this IoService.
  53. **/
  54. virtual void Stop() = 0;
  55. virtual ~IoService();
  56. };
  57. /**
  58. * A node exclusion rule provides a simple way of testing if the
  59. * client should attempt to connect to a node based on the node's
  60. * UUID. The FileSystem and FileHandle use the BadDataNodeTracker
  61. * by default. AsyncPreadSome takes an optional NodeExclusionRule
  62. * that will override the BadDataNodeTracker.
  63. **/
  64. class NodeExclusionRule {
  65. public:
  66. virtual ~NodeExclusionRule(){};
  67. virtual bool IsBadNode(const std::string &node_uuid) = 0;
  68. };
  69. /**
  70. * Applications opens a FileHandle to read files in HDFS.
  71. **/
  72. class FileHandle {
  73. public:
  74. /**
  75. * Read data from a specific position. The current implementation
  76. * stops at the block boundary.
  77. *
  78. * @param buf the pointer to the buffer
  79. * @param nbyte the size of the buffer
  80. * @param offset the offset the file
  81. *
  82. * The handler returns the datanode that serves the block and the number of
  83. * bytes has read.
  84. **/
  85. virtual void
  86. PositionRead(void *buf, size_t nbyte, uint64_t offset,
  87. const std::function<void(const Status &, size_t)> &handler) = 0;
  88. virtual Status PositionRead(void *buf, size_t *nbyte, off_t offset) = 0;
  89. virtual Status Read(void *buf, size_t *nbyte) = 0;
  90. virtual Status Seek(off_t *offset, std::ios_base::seekdir whence) = 0;
  91. /**
  92. * Cancel outstanding file operations. This is not reversable, once called
  93. * the handle should be disposed of.
  94. **/
  95. virtual void CancelOperations(void) = 0;
  96. /**
  97. * Determine if a datanode should be excluded from future operations
  98. * based on the return Status.
  99. *
  100. * @param status the Status object returned by InputStream::PositionRead
  101. * @return true if the status indicates a failure that is not recoverable
  102. * by the client and false otherwise.
  103. **/
  104. static bool ShouldExclude(const Status &status);
  105. /**
  106. * Sets an event callback for file-level event notifications (such as connecting
  107. * to the DataNode, communications errors, etc.)
  108. *
  109. * Many events are defined in hdfspp/events.h; the consumer should also expect
  110. * to be called with many private events, which can be ignored.
  111. *
  112. * @param callback The function to call when a reporting event occurs.
  113. */
  114. virtual void SetFileEventCallback(file_event_callback callback) = 0;
  115. virtual ~FileHandle();
  116. };
  117. /**
  118. * FileSystem implements APIs to interact with HDFS.
  119. **/
  120. class FileSystem {
  121. public:
  122. /**
  123. * Create a new instance of the FileSystem object. The call
  124. * initializes the RPC connections to the NameNode and returns an
  125. * FileSystem object.
  126. *
  127. * If user_name is blank, the current user will be used for a default.
  128. **/
  129. static FileSystem * New(
  130. IoService *&io_service, const std::string &user_name, const Options &options);
  131. virtual void Connect(const std::string &server,
  132. const std::string &service,
  133. const std::function<void(const Status &, FileSystem *)> &handler) = 0;
  134. /* Synchronous call of Connect */
  135. virtual Status Connect(const std::string &server,
  136. const std::string &service) = 0;
  137. /**
  138. * Connects to the hdfs instance indicated by the defaultFs value of the
  139. * Options structure.
  140. *
  141. * If no defaultFs is defined, returns an error.
  142. */
  143. virtual void ConnectToDefaultFs(
  144. const std::function<void(const Status &, FileSystem *)> &handler) = 0;
  145. virtual Status ConnectToDefaultFs() = 0;
  146. /**
  147. * Open a file on HDFS. The call issues an RPC to the NameNode to
  148. * gather the locations of all blocks in the file and to return a
  149. * new instance of the @ref InputStream object.
  150. **/
  151. virtual void
  152. Open(const std::string &path,
  153. const std::function<void(const Status &, FileHandle *)> &handler) = 0;
  154. virtual Status Open(const std::string &path, FileHandle **handle) = 0;
  155. /**
  156. * Returns metadata about the file if the file/directory exists.
  157. **/
  158. virtual void
  159. GetFileInfo(const std::string &path,
  160. const std::function<void(const Status &, const StatInfo &)> &handler) = 0;
  161. virtual Status GetFileInfo(const std::string &path, StatInfo & stat_info) = 0;
  162. /**
  163. * Retrieves the file system information as a whole, such as the total raw size of all files in the filesystem
  164. * and the raw capacity of the filesystem
  165. *
  166. * @param FsInfo struct to be populated by GetFsStats
  167. **/
  168. virtual void GetFsStats(
  169. const std::function<void(const Status &, const FsInfo &)> &handler) = 0;
  170. virtual Status GetFsStats(FsInfo & fs_info) = 0;
  171. /**
  172. * Retrieves the files contained in a directory and returns the metadata
  173. * for each of them.
  174. *
  175. * The asynchronous method will return batches of files; the consumer must
  176. * return true if they want more files to be delivered. The final bool
  177. * parameter in the callback will be set to true if this is the final
  178. * batch of files.
  179. *
  180. * The synchronous method will return all files in the directory.
  181. *
  182. * Path must be an absolute path in the hdfs filesytem (e.g. /tmp/foo/bar)
  183. **/
  184. virtual void
  185. GetListing(const std::string &path,
  186. const std::function<bool(const Status &, std::shared_ptr<std::vector<StatInfo>> &, bool)> &handler) = 0;
  187. virtual Status GetListing(const std::string &path,
  188. std::shared_ptr<std::vector<StatInfo>> & stat_infos) = 0;
  189. /**
  190. * Returns the locations of all known blocks for the indicated file, or an error
  191. * if the information clould not be found
  192. */
  193. virtual void GetBlockLocations(const std::string & path,
  194. const std::function<void(const Status &, std::shared_ptr<FileBlockLocation> locations)> ) = 0;
  195. virtual Status GetBlockLocations(const std::string & path,
  196. std::shared_ptr<FileBlockLocation> * locations) = 0;
  197. /*****************************************************************************
  198. * FILE SYSTEM SNAPSHOT FUNCTIONS
  199. ****************************************************************************/
  200. /**
  201. * Creates a snapshot of a snapshottable directory specified by path
  202. *
  203. * @param path Path to the directory to be snapshotted (must be non-empty)
  204. * @param name Name to be given to the created snapshot (may be empty)
  205. **/
  206. virtual void CreateSnapshot(const std::string &path, const std::string &name,
  207. const std::function<void(const Status &)> &handler) = 0;
  208. virtual Status CreateSnapshot(const std::string &path,
  209. const std::string &name) = 0;
  210. /**
  211. * Deletes the directory snapshot specified by path and name
  212. *
  213. * @param path Path to the snapshotted directory (must be non-empty)
  214. * @param name Name of the snapshot to be deleted (must be non-empty)
  215. **/
  216. virtual void DeleteSnapshot(const std::string &path, const std::string &name,
  217. const std::function<void(const Status &)> &handler) = 0;
  218. virtual Status DeleteSnapshot(const std::string &path,
  219. const std::string &name) = 0;
  220. /**
  221. * Allows snapshots to be made on the specified directory
  222. *
  223. * @param path Path to the directory to be made snapshottable (must be non-empty)
  224. **/
  225. virtual void AllowSnapshot(const std::string &path,
  226. const std::function<void(const Status &)> &handler) = 0;
  227. virtual Status AllowSnapshot(const std::string &path) = 0;
  228. /**
  229. * Disallows snapshots to be made on the specified directory
  230. *
  231. * @param path Path to the directory to be made non-snapshottable (must be non-empty)
  232. **/
  233. virtual void DisallowSnapshot(const std::string &path,
  234. const std::function<void(const Status &)> &handler) = 0;
  235. virtual Status DisallowSnapshot(const std::string &path) = 0;
  236. /**
  237. * Note that it is an error to destroy the filesystem from within a filesystem
  238. * callback. It will lead to a deadlock and the termination of the process.
  239. */
  240. virtual ~FileSystem() {};
  241. /**
  242. * Sets an event callback for fs-level event notifications (such as connecting
  243. * to the NameNode, communications errors with the NN, etc.)
  244. *
  245. * Many events are defined in hdfspp/events.h; the consumer should also expect
  246. * to be called with many private events, which can be ignored.
  247. *
  248. * @param callback The function to call when a reporting event occurs.
  249. */
  250. virtual void SetFsEventCallback(fs_event_callback callback) = 0;
  251. };
  252. }
  253. #endif