1
0

FileSystem.java 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.fs;
  19. import java.io.*;
  20. import java.net.*;
  21. import java.util.*;
  22. import java.util.regex.Pattern;
  23. import org.apache.commons.logging.*;
  24. import org.apache.hadoop.dfs.*;
  25. import org.apache.hadoop.conf.*;
  26. import org.apache.hadoop.util.*;
  27. /****************************************************************
  28. * An abstract base class for a fairly generic filesystem. It
  29. * may be implemented as a distributed filesystem, or as a "local"
  30. * one that reflects the locally-connected disk. The local version
  31. * exists for small Hadopp instances and for testing.
  32. *
  33. * <p>
  34. *
  35. * All user code that may potentially use the Hadoop Distributed
  36. * File System should be written to use a FileSystem object. The
  37. * Hadoop DFS is a multi-machine system that appears as a single
  38. * disk. It's useful because of its fault tolerance and potentially
  39. * very large capacity.
  40. *
  41. * <p>
  42. * The local implementation is {@link LocalFileSystem} and distributed
  43. * implementation is {@link DistributedFileSystem}.
  44. * @author Mike Cafarella
  45. *****************************************************************/
  46. public abstract class FileSystem extends Configured {
  47. public static final Log LOG = LogFactory.getLog("org.apache.hadoop.dfs.DistributedFileSystem");
  48. // cache indexed by URI scheme and authority
  49. private static final Map<String,Map<String,FileSystem>> CACHE
  50. = new HashMap<String,Map<String,FileSystem>>();
  51. /**
  52. * Parse the cmd-line args, starting at i. Remove consumed args
  53. * from array. We expect param in the form:
  54. * '-local | -dfs <namenode:port>'
  55. */
  56. public static FileSystem parseArgs(String argv[], int i, Configuration conf) throws IOException {
  57. /**
  58. if (argv.length - i < 1) {
  59. throw new IOException("Must indicate filesystem type for DFS");
  60. }
  61. */
  62. int orig = i;
  63. FileSystem fs = null;
  64. String cmd = argv[i];
  65. if ("-dfs".equals(cmd)) {
  66. i++;
  67. InetSocketAddress addr = DataNode.createSocketAddr(argv[i++]);
  68. fs = new DistributedFileSystem(addr, conf);
  69. } else if ("-local".equals(cmd)) {
  70. i++;
  71. fs = FileSystem.getLocal(conf);
  72. } else {
  73. fs = get(conf); // using default
  74. LOG.info("No FS indicated, using default:"+fs.getName());
  75. }
  76. System.arraycopy(argv, i, argv, orig, argv.length - i);
  77. for (int j = argv.length - i; j < argv.length; j++) {
  78. argv[j] = null;
  79. }
  80. return fs;
  81. }
  82. /** Returns the configured filesystem implementation.*/
  83. public static FileSystem get(Configuration conf) throws IOException {
  84. return getNamed(conf.get("fs.default.name", "local"), conf);
  85. }
  86. /** Called after a new FileSystem instance is constructed.
  87. * @param name a uri whose authority section names the host, port, etc.
  88. * for this FileSystem
  89. * @param conf the configuration
  90. */
  91. public abstract void initialize(URI name, Configuration conf)
  92. throws IOException;
  93. /** Returns a URI whose scheme and authority identify this FileSystem.*/
  94. public abstract URI getUri();
  95. /** @deprecated call #getUri() instead.*/
  96. public abstract String getName();
  97. /** @deprecated call #get(URI,Configuration) instead. */
  98. public static FileSystem getNamed(String name, Configuration conf)
  99. throws IOException {
  100. // convert old-format name to new-format name
  101. if (name.equals("local")) { // "local" is now "file:///".
  102. name = "file:///";
  103. } else if (name.indexOf('/')==-1) { // unqualified is "hdfs://"
  104. name = "hdfs://"+name;
  105. }
  106. return get(URI.create(name), conf);
  107. }
  108. /**
  109. * Get the local file syste
  110. * @param conf the configuration to configure the file system with
  111. * @return a LocalFileSystem
  112. */
  113. public static LocalFileSystem getLocal(Configuration conf)
  114. throws IOException {
  115. return (LocalFileSystem)get(LocalFileSystem.NAME, conf);
  116. }
  117. /** Returns the FileSystem for this URI's scheme and authority. The scheme
  118. * of the URI determines a configuration property name,
  119. * <tt>fs.<i>scheme</i>.class</tt> whose value names the FileSystem class.
  120. * The entire URI is passed to the FileSystem instance's initialize method.
  121. */
  122. public static synchronized FileSystem get(URI uri, Configuration conf)
  123. throws IOException {
  124. String scheme = uri.getScheme();
  125. String authority = uri.getAuthority();
  126. if (scheme == null) { // no scheme: use default FS
  127. return get(conf);
  128. }
  129. Map<String,FileSystem> authorityToFs = CACHE.get(scheme);
  130. if (authorityToFs == null) {
  131. authorityToFs = new HashMap<String,FileSystem>();
  132. CACHE.put(scheme, authorityToFs);
  133. }
  134. FileSystem fs = authorityToFs.get(authority);
  135. if (fs == null) {
  136. Class fsClass = conf.getClass("fs."+scheme+".impl", null);
  137. if (fsClass == null) {
  138. throw new IOException("No FileSystem for scheme: " + scheme);
  139. }
  140. fs = (FileSystem)ReflectionUtils.newInstance(fsClass, conf);
  141. fs.initialize(uri, conf);
  142. authorityToFs.put(authority, fs);
  143. }
  144. return fs;
  145. }
  146. /** Make sure that a path specifies a FileSystem. */
  147. public Path makeQualified(Path path) {
  148. checkPath(path);
  149. if (!path.isAbsolute())
  150. path = new Path(getWorkingDirectory(), path);
  151. URI pathUri = path.toUri();
  152. URI fsUri = getUri();
  153. String scheme = pathUri.getScheme();
  154. String authority = pathUri.getAuthority();
  155. if (scheme != null &&
  156. (authority != null || fsUri.getAuthority() == null))
  157. return path;
  158. if (scheme == null) {
  159. scheme = fsUri.getScheme();
  160. }
  161. if (authority == null) {
  162. authority = fsUri.getAuthority();
  163. if (authority == null) {
  164. authority = "";
  165. }
  166. }
  167. return new Path(scheme+":"+"//"+authority + pathUri.getPath());
  168. }
  169. /** Return the name of the checksum file associated with a file.*/
  170. public static Path getChecksumFile(Path file) {
  171. return new Path(file.getParent(), "."+file.getName()+".crc");
  172. }
  173. /** Return the length of the checksum file given the size of the
  174. * actual file.
  175. **/
  176. public static long getChecksumFileLength(long fileSize, int bytesPerSum) {
  177. return FSDataOutputStream.getChecksumLength(fileSize, bytesPerSum);
  178. }
  179. /** Return true iff file is a checksum file name.*/
  180. public static boolean isChecksumFile(Path file) {
  181. String name = file.getName();
  182. return name.startsWith(".") && name.endsWith(".crc");
  183. }
  184. ///////////////////////////////////////////////////////////////
  185. // FileSystem
  186. ///////////////////////////////////////////////////////////////
  187. protected FileSystem() {
  188. super(null);
  189. }
  190. /** Check that a Path belongs to this FileSystem. */
  191. protected void checkPath(Path path) {
  192. URI uri = path.toUri();
  193. if (uri.getScheme() == null) // fs is relative
  194. return;
  195. String thisAuthority = this.getUri().getAuthority();
  196. String thatAuthority = uri.getAuthority();
  197. if (!(this.getUri().getScheme().equals(uri.getScheme()) &&
  198. (thisAuthority == null && thatAuthority == null)
  199. || thisAuthority.equals(thatAuthority)))
  200. throw new IllegalArgumentException("Wrong FS: "+path+
  201. ", expected: "+this.getUri());
  202. }
  203. /**
  204. * Return a 2D array of size 1x1 or greater, containing hostnames
  205. * where portions of the given file can be found. For a nonexistent
  206. * file or regions, null will be returned.
  207. *
  208. * This call is most helpful with DFS, where it returns
  209. * hostnames of machines that contain the given file.
  210. *
  211. * The FileSystem will simply return an elt containing 'localhost'.
  212. */
  213. public abstract String[][] getFileCacheHints(Path f, long start, long len) throws IOException;
  214. /**
  215. * Opens an FSDataInputStream at the indicated Path.
  216. * @param f the file name to open
  217. * @param bufferSize the size of the buffer to be used.
  218. */
  219. public FSDataInputStream open(Path f, int bufferSize) throws IOException {
  220. return new FSDataInputStream(this, f, bufferSize, getConf());
  221. }
  222. /**
  223. * Opens an FSDataInputStream at the indicated Path.
  224. * @param f the file to open
  225. */
  226. public FSDataInputStream open(Path f) throws IOException {
  227. return new FSDataInputStream(this, f, getConf());
  228. }
  229. /**
  230. * Opens an InputStream for the indicated Path, whether local
  231. * or via DFS.
  232. */
  233. public abstract FSInputStream openRaw(Path f) throws IOException;
  234. /**
  235. * Opens an FSDataOutputStream at the indicated Path.
  236. * Files are overwritten by default.
  237. */
  238. public FSDataOutputStream create(Path f) throws IOException {
  239. return create(f, true,
  240. getConf().getInt("io.file.buffer.size", 4096),
  241. getDefaultReplication(),
  242. getDefaultBlockSize());
  243. }
  244. /**
  245. * Create an FSDataOutputStream at the indicated Path with write-progress
  246. * reporting.
  247. * Files are overwritten by default.
  248. */
  249. public FSDataOutputStream create(Path f, Progressable progress) throws IOException {
  250. return create(f, true,
  251. getConf().getInt("io.file.buffer.size", 4096),
  252. getDefaultReplication(),
  253. getDefaultBlockSize(), progress);
  254. }
  255. /**
  256. * Opens an FSDataOutputStream at the indicated Path.
  257. * Files are overwritten by default.
  258. */
  259. public FSDataOutputStream create(Path f, short replication)
  260. throws IOException {
  261. return create(f, true,
  262. getConf().getInt("io.file.buffer.size", 4096),
  263. replication,
  264. getDefaultBlockSize());
  265. }
  266. /**
  267. * Opens an FSDataOutputStream at the indicated Path with write-progress
  268. * reporting.
  269. * Files are overwritten by default.
  270. */
  271. public FSDataOutputStream create(Path f, short replication, Progressable progress)
  272. throws IOException {
  273. return create(f, true,
  274. getConf().getInt("io.file.buffer.size", 4096),
  275. replication,
  276. getDefaultBlockSize(), progress);
  277. }
  278. /**
  279. * Opens an FSDataOutputStream at the indicated Path.
  280. * @param f the file name to open
  281. * @param overwrite if a file with this name already exists, then if true,
  282. * the file will be overwritten, and if false an error will be thrown.
  283. * @param bufferSize the size of the buffer to be used.
  284. */
  285. public FSDataOutputStream create( Path f,
  286. boolean overwrite,
  287. int bufferSize
  288. ) throws IOException {
  289. return create( f, overwrite, bufferSize,
  290. getDefaultReplication(),
  291. getDefaultBlockSize());
  292. }
  293. /**
  294. * Opens an FSDataOutputStream at the indicated Path with write-progress
  295. * reporting.
  296. * @param f the file name to open
  297. * @param overwrite if a file with this name already exists, then if true,
  298. * the file will be overwritten, and if false an error will be thrown.
  299. * @param bufferSize the size of the buffer to be used.
  300. */
  301. public FSDataOutputStream create( Path f,
  302. boolean overwrite,
  303. int bufferSize,
  304. Progressable progress
  305. ) throws IOException {
  306. return create( f, overwrite, bufferSize,
  307. getDefaultReplication(),
  308. getDefaultBlockSize(), progress);
  309. }
  310. /**
  311. * Opens an FSDataOutputStream at the indicated Path.
  312. * @param f the file name to open
  313. * @param overwrite if a file with this name already exists, then if true,
  314. * the file will be overwritten, and if false an error will be thrown.
  315. * @param bufferSize the size of the buffer to be used.
  316. * @param replication required block replication for the file.
  317. */
  318. public FSDataOutputStream create( Path f,
  319. boolean overwrite,
  320. int bufferSize,
  321. short replication,
  322. long blockSize
  323. ) throws IOException {
  324. return new FSDataOutputStream(this, f, overwrite, getConf(),
  325. bufferSize, replication, blockSize );
  326. }
  327. /**
  328. * Opens an FSDataOutputStream at the indicated Path with write-progress
  329. * reporting.
  330. * @param f the file name to open
  331. * @param overwrite if a file with this name already exists, then if true,
  332. * the file will be overwritten, and if false an error will be thrown.
  333. * @param bufferSize the size of the buffer to be used.
  334. * @param replication required block replication for the file.
  335. */
  336. public FSDataOutputStream create( Path f,
  337. boolean overwrite,
  338. int bufferSize,
  339. short replication,
  340. long blockSize,
  341. Progressable progress
  342. ) throws IOException {
  343. return new FSDataOutputStream(this, f, overwrite, getConf(),
  344. bufferSize, replication, blockSize, progress );
  345. }
  346. /** Opens an OutputStream at the indicated Path.
  347. * @param f the file name to open
  348. * @param overwrite if a file with this name already exists, then if true,
  349. * the file will be overwritten, and if false an error will be thrown.
  350. * @param replication required block replication for the file.
  351. */
  352. public abstract FSOutputStream createRaw(Path f, boolean overwrite,
  353. short replication,
  354. long blockSize)
  355. throws IOException;
  356. /** Opens an OutputStream at the indicated Path with write-progress
  357. * reporting.
  358. * @param f the file name to open
  359. * @param overwrite if a file with this name already exists, then if true,
  360. * the file will be overwritten, and if false an error will be thrown.
  361. * @param replication required block replication for the file.
  362. */
  363. public abstract FSOutputStream createRaw(Path f, boolean overwrite,
  364. short replication,
  365. long blockSize, Progressable progress)
  366. throws IOException;
  367. /**
  368. * Creates the given Path as a brand-new zero-length file. If
  369. * create fails, or if it already existed, return false.
  370. */
  371. public boolean createNewFile(Path f) throws IOException {
  372. if (exists(f)) {
  373. return false;
  374. } else {
  375. create(f,false,getConf().getInt("io.file.buffer.size", 4096)).close();
  376. return true;
  377. }
  378. }
  379. /**
  380. * Set replication for an existing file.
  381. *
  382. * @param src file name
  383. * @param replication new replication
  384. * @throws IOException
  385. * @return true if successful;
  386. * false if file does not exist or is a directory
  387. */
  388. public boolean setReplication(Path src, short replication) throws IOException {
  389. boolean value = setReplicationRaw(src, replication);
  390. if( ! value )
  391. return false;
  392. Path checkFile = getChecksumFile(src);
  393. if (exists(checkFile))
  394. setReplicationRaw(checkFile, replication);
  395. return true;
  396. }
  397. /**
  398. * Get replication.
  399. *
  400. * @param src file name
  401. * @return file replication
  402. * @throws IOException
  403. */
  404. public abstract short getReplication(Path src) throws IOException;
  405. /**
  406. * Set replication for an existing file.
  407. *
  408. * @param src file name
  409. * @param replication new replication
  410. * @throws IOException
  411. * @return true if successful;
  412. * false if file does not exist or is a directory
  413. */
  414. public abstract boolean setReplicationRaw(Path src, short replication) throws IOException;
  415. /**
  416. * Renames Path src to Path dst. Can take place on local fs
  417. * or remote DFS.
  418. */
  419. public boolean rename(Path src, Path dst) throws IOException {
  420. if (isDirectory(src)) {
  421. return renameRaw(src, dst);
  422. } else {
  423. boolean value = renameRaw(src, dst);
  424. if (!value)
  425. return false;
  426. Path checkFile = getChecksumFile(src);
  427. if (exists(checkFile)) { //try to rename checksum
  428. if(isDirectory(dst)) {
  429. renameRaw(checkFile, dst);
  430. } else {
  431. renameRaw(checkFile, getChecksumFile(dst));
  432. }
  433. }
  434. return value;
  435. }
  436. }
  437. /**
  438. * Renames Path src to Path dst. Can take place on local fs
  439. * or remote DFS.
  440. */
  441. public abstract boolean renameRaw(Path src, Path dst) throws IOException;
  442. /** Delete a file. */
  443. public boolean delete(Path f) throws IOException {
  444. if (isDirectory(f)) {
  445. return deleteRaw(f);
  446. } else {
  447. deleteRaw(getChecksumFile(f)); // try to delete checksum
  448. return deleteRaw(f);
  449. }
  450. }
  451. /**
  452. * Deletes Path
  453. */
  454. public abstract boolean deleteRaw(Path f) throws IOException;
  455. /** Check if exists. */
  456. public abstract boolean exists(Path f) throws IOException;
  457. /** True iff the named path is a directory. */
  458. public abstract boolean isDirectory(Path f) throws IOException;
  459. /** True iff the named path is a regular file. */
  460. public boolean isFile(Path f) throws IOException {
  461. if (exists(f) && ! isDirectory(f)) {
  462. return true;
  463. } else {
  464. return false;
  465. }
  466. }
  467. /** The number of bytes in a file. */
  468. public abstract long getLength(Path f) throws IOException;
  469. /** Return the number of bytes of the given path
  470. * If <i>f</i> is a file, return the size of the file;
  471. * If <i>f</i> is a directory, return the size of the directory tree
  472. */
  473. public long getContentLength(Path f) throws IOException {
  474. if (!isDirectory(f)) {
  475. // f is a file
  476. return getLength(f);
  477. }
  478. // f is a diretory
  479. Path[] contents = listPathsRaw(f);
  480. long size = 0;
  481. for(int i=0; i<contents.length; i++) {
  482. size += getContentLength(contents[i]);
  483. }
  484. return size;
  485. }
  486. final private static PathFilter DEFAULT_FILTER = new PathFilter() {
  487. public boolean accept(Path file) {
  488. return !isChecksumFile(file);
  489. }
  490. };
  491. /** List files in a directory. */
  492. public Path[] listPaths(Path f) throws IOException {
  493. return listPaths(f, DEFAULT_FILTER);
  494. }
  495. /** List files in a directory. */
  496. public abstract Path[] listPathsRaw(Path f) throws IOException;
  497. /** Filter raw files in a directory. */
  498. private void listPaths(ArrayList<Path> results, Path f, PathFilter filter)
  499. throws IOException {
  500. Path listing[] = listPathsRaw(f);
  501. if (listing != null) {
  502. for (int i = 0; i < listing.length; i++) {
  503. if (filter.accept(listing[i])) {
  504. results.add(listing[i]);
  505. }
  506. }
  507. }
  508. }
  509. /** Filter raw files in a directory. */
  510. public Path[] listPaths(Path f, PathFilter filter) throws IOException {
  511. ArrayList<Path> results = new ArrayList<Path>();
  512. listPaths(results, f, filter);
  513. return (Path[]) results.toArray(new Path[results.size()]);
  514. }
  515. /**
  516. * Filter raw files in a list directories using the default checksum filter.
  517. * @param files a list of paths
  518. * @return a list of files under the source paths
  519. * @exception IOException
  520. */
  521. public Path[] listPaths(Path[] files ) throws IOException {
  522. return listPaths( files, DEFAULT_FILTER );
  523. }
  524. /**
  525. * Filter raw files in a list directories using user-supplied path filter.
  526. * @param files a list of paths
  527. * @return a list of files under the source paths
  528. * @exception IOException
  529. */
  530. public Path[] listPaths(Path[] files, PathFilter filter)
  531. throws IOException {
  532. ArrayList<Path> results = new ArrayList<Path>();
  533. for(int i=0; i<files.length; i++) {
  534. listPaths(results, files[i], filter);
  535. }
  536. return (Path[]) results.toArray(new Path[results.size()]);
  537. }
  538. /**
  539. * <p>Return all the files that match filePattern and are not checksum
  540. * files. Results are sorted by their names.
  541. *
  542. * <p>
  543. * A filename pattern is composed of <i>regular</i> characters and
  544. * <i>special pattern matching</i> characters, which are:
  545. *
  546. * <dl>
  547. * <dd>
  548. * <dl>
  549. * <p>
  550. * <dt> <tt> ? </tt>
  551. * <dd> Matches any single character.
  552. *
  553. * <p>
  554. * <dt> <tt> * </tt>
  555. * <dd> Matches zero or more characters.
  556. *
  557. * <p>
  558. * <dt> <tt> [<i>abc</i>] </tt>
  559. * <dd> Matches a single character from character set
  560. * <tt>{<i>a,b,c</i>}</tt>.
  561. *
  562. * <p>
  563. * <dt> <tt> [<i>a</i>-<i>b</i>] </tt>
  564. * <dd> Matches a single character from the character range
  565. * <tt>{<i>a...b</i>}</tt>. Note that character <tt><i>a</i></tt> must be
  566. * lexicographically less than or equal to character <tt><i>b</i></tt>.
  567. *
  568. * <p>
  569. * <dt> <tt> [^<i>a</i>] </tt>
  570. * <dd> Matches a single character that is not from character set or range
  571. * <tt>{<i>a</i>}</tt>. Note that the <tt>^</tt> character must occur
  572. * immediately to the right of the opening bracket.
  573. *
  574. * <p>
  575. * <dt> <tt> \<i>c</i> </tt>
  576. * <dd> Removes (escapes) any special meaning of character <i>c</i>.
  577. *
  578. * </dl>
  579. * </dd>
  580. * </dl>
  581. *
  582. * @param filePattern a regular expression specifying file pattern
  583. * @return an array of paths that match the file pattern
  584. * @throws IOException
  585. */
  586. public Path[] globPaths(Path filePattern) throws IOException {
  587. return globPaths(filePattern, DEFAULT_FILTER);
  588. }
  589. /** glob all the file names that matches filePattern
  590. * and is accepted by filter.
  591. */
  592. public Path[] globPaths(Path filePattern, PathFilter filter)
  593. throws IOException {
  594. Path [] parents = new Path[1];
  595. int level = 0;
  596. String filename = filePattern.toUri().getPath();
  597. if("".equals(filename) || Path.SEPARATOR.equals(filename)) {
  598. parents[0] = filePattern;
  599. return parents;
  600. }
  601. String [] components = filename.split(Path.SEPARATOR);
  602. if(filePattern.isAbsolute()) {
  603. parents[0] = new Path(Path.SEPARATOR);
  604. level = 1;
  605. } else {
  606. parents[0] = new Path( "" );
  607. }
  608. Path[] results = globPathsLevel(parents, components, level, filter);
  609. Arrays.sort(results);
  610. return results;
  611. }
  612. private Path[] globPathsLevel(Path[] parents,
  613. String [] filePattern, int level, PathFilter filter) throws IOException {
  614. if (level == filePattern.length)
  615. return parents;
  616. GlobFilter fp = new GlobFilter(filePattern[level], filter);
  617. if( fp.hasPattern()) {
  618. parents = listPaths(parents, fp);
  619. } else {
  620. for(int i=0; i<parents.length; i++) {
  621. parents[i] = new Path(parents[i], filePattern[level]);
  622. }
  623. }
  624. return globPathsLevel(parents, filePattern, level+1, filter);
  625. }
  626. private static class GlobFilter implements PathFilter {
  627. private PathFilter userFilter = DEFAULT_FILTER;
  628. private Pattern regex;
  629. private boolean hasPattern = false;
  630. /** Default pattern character: Escape any special meaning. */
  631. private static final char PAT_ESCAPE = '\\';
  632. /** Default pattern character: Any single character. */
  633. private static final char PAT_ANY = '.';
  634. /** Default pattern character: Character set close. */
  635. private static final char PAT_SET_CLOSE = ']';
  636. GlobFilter() {
  637. }
  638. GlobFilter(String filePattern) throws IOException {
  639. setRegex(filePattern);
  640. }
  641. GlobFilter(String filePattern, PathFilter filter) throws IOException {
  642. userFilter = filter;
  643. setRegex(filePattern);
  644. }
  645. void setRegex(String filePattern) throws IOException {
  646. int len;
  647. int setOpen;
  648. boolean setRange;
  649. StringBuffer fileRegex = new StringBuffer();
  650. // Validate the pattern
  651. len = filePattern.length();
  652. if (len == 0)
  653. return;
  654. setOpen = 0;
  655. setRange = false;
  656. for (int i = 0; i < len; i++)
  657. {
  658. char pCh;
  659. // Examine a single pattern character
  660. pCh = filePattern.charAt(i);
  661. if( pCh == PAT_ESCAPE ) {
  662. fileRegex.append( pCh );
  663. i++;
  664. if (i >= len)
  665. error( "An escaped character does not present",
  666. filePattern, i);
  667. pCh = filePattern.charAt(i);
  668. } else if( pCh == '.' ) {
  669. fileRegex.append( PAT_ESCAPE );
  670. } else if( pCh == '*' ) {
  671. fileRegex.append( PAT_ANY );
  672. hasPattern = true;
  673. } else if( pCh == '?' ) {
  674. pCh = PAT_ANY ;
  675. hasPattern = true;
  676. } else if( pCh == '[' && setOpen == 0 ) {
  677. setOpen++;
  678. hasPattern = true;
  679. } else if( pCh == '^' && setOpen > 0) {
  680. } else if (pCh == '-' && setOpen > 0) {
  681. // Character set range
  682. setRange = true;
  683. } else if (pCh == PAT_SET_CLOSE && setRange) {
  684. // Incomplete character set range
  685. error("Incomplete character set range", filePattern, i);
  686. } else if (pCh == PAT_SET_CLOSE && setOpen > 0) {
  687. // End of a character set
  688. if (setOpen < 2)
  689. error("Unexpected end of set", filePattern, i);
  690. setOpen = 0;
  691. } else if (setOpen > 0) {
  692. // Normal character, or the end of a character set range
  693. setOpen++;
  694. setRange = false;
  695. }
  696. fileRegex.append( pCh );
  697. }
  698. // Check for a well-formed pattern
  699. if (setOpen > 0 || setRange)
  700. {
  701. // Incomplete character set or character range
  702. error("Expecting set closure character or end of range", filePattern, len);
  703. }
  704. regex = Pattern.compile(fileRegex.toString());
  705. }
  706. boolean hasPattern() {
  707. return hasPattern;
  708. }
  709. public boolean accept(Path path) {
  710. return regex.matcher(path.getName()).matches() && userFilter.accept(path);
  711. }
  712. private void error(String s, String pattern, int pos) throws IOException {
  713. throw new IOException("Illegal file pattern: "
  714. +s+" for glob "+pattern + " at " + pos);
  715. }
  716. }
  717. /**
  718. * Set the current working directory for the given file system.
  719. * All relative paths will be resolved relative to it.
  720. * @param new_dir
  721. */
  722. public abstract void setWorkingDirectory(Path new_dir);
  723. /**
  724. * Get the current working directory for the given file system
  725. * @return the directory pathname
  726. */
  727. public abstract Path getWorkingDirectory();
  728. /**
  729. * Make the given file and all non-existent parents into
  730. * directories. Has the semantics of Unix 'mkdir -p'.
  731. * Existence of the directory hierarchy is not an error.
  732. */
  733. public abstract boolean mkdirs(Path f) throws IOException;
  734. /**
  735. * Obtain a lock on the given Path
  736. *
  737. * @deprecated FS does not support file locks anymore.
  738. */
  739. @Deprecated
  740. public abstract void lock(Path f, boolean shared) throws IOException;
  741. /**
  742. * Release the lock
  743. *
  744. * @deprecated FS does not support file locks anymore.
  745. */
  746. @Deprecated
  747. public abstract void release(Path f) throws IOException;
  748. /**
  749. * The src file is on the local disk. Add it to FS at
  750. * the given dst name and the source is kept intact afterwards
  751. */
  752. public abstract void copyFromLocalFile(Path src, Path dst) throws IOException;
  753. /**
  754. * The src file is on the local disk. Add it to FS at
  755. * the given dst name, removing the source afterwards.
  756. */
  757. public abstract void moveFromLocalFile(Path src, Path dst) throws IOException;
  758. /**
  759. * The src file is under FS, and the dst is on the local disk.
  760. * Copy it from FS control to the local dst name.
  761. * If src and dst are directories, copy crc files as well.
  762. */
  763. public void copyToLocalFile(Path src, Path dst) throws IOException {
  764. copyToLocalFile(src, dst, true);
  765. }
  766. /**
  767. * The src file is under FS, and the dst is on the local disk.
  768. * Copy it from FS control to the local dst name.
  769. * If src and dst are directories, the copyCrc parameter
  770. * determines whether to copy CRC files.
  771. */
  772. public abstract void copyToLocalFile(Path src, Path dst, boolean copyCrc) throws IOException;
  773. /**
  774. * Returns a local File that the user can write output to. The caller
  775. * provides both the eventual FS target name and the local working
  776. * file. If the FS is local, we write directly into the target. If
  777. * the FS is remote, we write into the tmp local area.
  778. */
  779. public abstract Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) throws IOException;
  780. /**
  781. * Called when we're all done writing to the target. A local FS will
  782. * do nothing, because we've written to exactly the right place. A remote
  783. * FS will copy the contents of tmpLocalFile to the correct target at
  784. * fsOutputFile.
  785. */
  786. public abstract void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) throws IOException;
  787. /**
  788. * No more filesystem operations are needed. Will
  789. * release any held locks.
  790. */
  791. public void close() throws IOException {
  792. URI uri = getUri();
  793. synchronized (FileSystem.class) {
  794. Map<String,FileSystem> authorityToFs = CACHE.get(uri.getScheme());
  795. if (authorityToFs != null) {
  796. authorityToFs.remove(uri.getAuthority());
  797. }
  798. }
  799. }
  800. /**
  801. * Report a checksum error to the file system.
  802. * @param f the file name containing the error
  803. * @param in the stream open on the file
  804. * @param inPos the position of the beginning of the bad data in the file
  805. * @param sums the stream open on the checksum file
  806. * @param sumsPos the position of the beginning of the bad data in the checksum file
  807. */
  808. public abstract void reportChecksumFailure(Path f,
  809. FSInputStream in, long inPos,
  810. FSInputStream sums, long sumsPos);
  811. /**
  812. * Get the size for a particular file.
  813. * @param f the filename
  814. * @return the number of bytes in a block
  815. */
  816. public abstract long getBlockSize(Path f) throws IOException;
  817. /** Return the number of bytes that large input files should be optimally
  818. * be split into to minimize i/o time. */
  819. public abstract long getDefaultBlockSize();
  820. /**
  821. * Get the default replication.
  822. */
  823. public abstract short getDefaultReplication();
  824. }