Browse Source

HADOOP-10798. globStatus() should always return a sorted list of files (cmccabe)

Colin Patrick Mccabe 9 years ago
parent
commit
68e588cbee

+ 3 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -905,6 +905,9 @@ Release 2.8.0 - UNRELEASED
     HADOOP-12154. FileSystem#getUsed() returns the file length only from root '/'
     (J.Andreina via vinayakumarb)
 
+    HADOOP-10798. globStatus() should always return a sorted list of files
+    (cmccabe)
+
 Release 2.7.2 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 10 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.fs;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 
 import org.apache.commons.logging.LogFactory;
@@ -285,6 +286,14 @@ class Globber {
         (flattenedPatterns.size() <= 1)) {
       return null;
     }
-    return results.toArray(new FileStatus[0]);
+    /*
+     * In general, the results list will already be sorted, since listStatus
+     * returns results in sorted order for many Hadoop filesystems.  However,
+     * not all Hadoop filesystems have this property.  So we sort here in order
+     * to get consistent results.  See HADOOP-10798 for details.
+     */
+    FileStatus ret[] = results.toArray(new FileStatus[0]);
+    Arrays.sort(ret);
+    return ret;
   }
 }

+ 25 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java

@@ -21,9 +21,11 @@ import static org.junit.Assert.*;
 
 import java.io.IOException;
 import java.security.PrivilegedExceptionAction;
+import java.util.ArrayList;
 import java.util.UUID;
 import java.util.regex.Pattern;
 
+import com.google.common.collect.Ordering;
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.permission.FsPermission;
@@ -1284,4 +1286,27 @@ public class TestGlobPaths {
   public void testNonTerminalGlobsOnFC() throws Exception {
     testOnFileContext(new TestNonTerminalGlobs(true));
   }
+
+  @Test
+  public void testLocalFilesystem() throws Exception {
+    Configuration conf = new Configuration();
+    FileSystem fs = FileSystem.getLocal(conf);
+    String localTmp = System.getProperty("java.io.tmpdir");
+    Path base = new Path(new Path(localTmp), UUID.randomUUID().toString());
+    Assert.assertTrue(fs.mkdirs(base));
+    Assert.assertTrue(fs.mkdirs(new Path(base, "e")));
+    Assert.assertTrue(fs.mkdirs(new Path(base, "c")));
+    Assert.assertTrue(fs.mkdirs(new Path(base, "a")));
+    Assert.assertTrue(fs.mkdirs(new Path(base, "d")));
+    Assert.assertTrue(fs.mkdirs(new Path(base, "b")));
+    fs.deleteOnExit(base);
+    FileStatus[] status = fs.globStatus(new Path(base, "*"));
+    ArrayList list = new ArrayList();
+    for (FileStatus f: status) {
+        list.add(f.getPath().toString());
+    }
+    boolean sorted = Ordering.natural().isOrdered(list);
+    Assert.assertTrue(sorted);
+  }
 }
+