فهرست منبع

HADOOP-12009: Clarify FileSystem.listStatus() sorting order & fix FileSystemContractBaseTest:testListStatus. (J.Andreina via jghoman)

Jakob Homan 9 سال پیش
والد
کامیت
ab3197c204

+ 3 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -497,6 +497,9 @@ Trunk (Unreleased)
 
     HADOOP-11762. Enable swift distcp to secure HDFS (Chen He via aw)
 
+    HADOOP-12009. Clarify FileSystem.listStatus() sorting order & fix
+    FileSystemContractBaseTest:testListStatus. (J.Andreina via jghoman)
+
   OPTIMIZATIONS
 
     HADOOP-7761. Improve the performance of raw comparisons. (todd)

+ 16 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java

@@ -1501,7 +1501,9 @@ public abstract class FileSystem extends Configured implements Closeable {
   /**
    * List the statuses of the files/directories in the given path if the path is
    * a directory.
-   * 
+   * <p>
+   * Does not guarantee to return the List of files/directories status in a
+   * sorted order.
    * @param f given path
    * @return the statuses of the files/directories in the given patch
    * @throws FileNotFoundException when the path does not exist;
@@ -1543,6 +1545,9 @@ public abstract class FileSystem extends Configured implements Closeable {
   /**
    * Filter files/directories in the given path using the user-supplied path
    * filter.
+   * <p>
+   * Does not guarantee to return the List of files/directories status in a
+   * sorted order.
    * 
    * @param f
    *          a path name
@@ -1563,6 +1568,9 @@ public abstract class FileSystem extends Configured implements Closeable {
   /**
    * Filter files/directories in the given list of paths using default
    * path filter.
+   * <p>
+   * Does not guarantee to return the List of files/directories status in a
+   * sorted order.
    * 
    * @param files
    *          a list of paths
@@ -1579,6 +1587,9 @@ public abstract class FileSystem extends Configured implements Closeable {
   /**
    * Filter files/directories in the given list of paths using user-supplied
    * path filter.
+   * <p>
+   * Does not guarantee to return the List of files/directories status in a
+   * sorted order.
    * 
    * @param files
    *          a list of paths
@@ -1739,6 +1750,8 @@ public abstract class FileSystem extends Configured implements Closeable {
    * while consuming the entries. Each file system implementation should
    * override this method and provide a more efficient implementation, if
    * possible. 
+   * Does not guarantee to return the iterator that traverses statuses
+   * of the files in a sorted order.
    *
    * @param p target path
    * @return remote iterator
@@ -1766,6 +1779,8 @@ public abstract class FileSystem extends Configured implements Closeable {
 
   /**
    * List the statuses and block locations of the files in the given path.
+   * Does not guarantee to return the iterator that traverses statuses
+   * of the files in a sorted order.
    * 
    * If the path is a directory, 
    *   if recursive is false, returns files in the directory;

+ 4 - 0
hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md

@@ -183,6 +183,10 @@ to the same path:
     forall fs in listStatus(Path) :
       fs == getFileStatus(fs.path)
 
+**Ordering of results**: there is no guarantee of ordering of the listed entries.
+While HDFS currently returns an alphanumerically sorted list, neither the Posix `readdir()`
+nor Java's `File.listFiles()` API calls define any ordering of returned values. Applications
+which require a uniform sort order on the results must perform the sorting themselves.
 
 ### Atomicity and Consistency
 

+ 8 - 3
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.fs;
 
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.util.ArrayList;
 
 import junit.framework.TestCase;
 
@@ -224,9 +225,13 @@ public abstract class FileSystemContractBaseTest extends TestCase {
 
     paths = fs.listStatus(path("/test/hadoop"));
     assertEquals(3, paths.length);
-    assertEquals(path("/test/hadoop/a"), paths[0].getPath());
-    assertEquals(path("/test/hadoop/b"), paths[1].getPath());
-    assertEquals(path("/test/hadoop/c"), paths[2].getPath());
+    ArrayList<Path> list = new ArrayList<Path>();
+    for (FileStatus fileState : paths) {
+      list.add(fileState.getPath());
+    }
+    assertTrue(list.contains(path("/test/hadoop/a")));
+    assertTrue(list.contains(path("/test/hadoop/b")));
+    assertTrue(list.contains(path("/test/hadoop/c")));
 
     paths = fs.listStatus(path("/test/hadoop/a"));
     assertEquals(0, paths.length);