Browse Source

HADOOP-8164. Merging change 1300290 from trunk to 0.23

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23.2@1300293 13f79535-47bb-0310-9956-ffa450edef68
Suresh Srinivas 13 years ago
parent
commit
e1279f5f42

+ 3 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -86,6 +86,9 @@ Release 0.23.2 - UNRELEASED
     HADOOP-8140. dfs -getmerge should process its argments better (Daryn Sharp
     HADOOP-8140. dfs -getmerge should process its argments better (Daryn Sharp
     via bobby)
     via bobby)
 
 
+    HADOOP-8164. Back slash as path separator is handled for Windows only.
+    (Daryn Sharp via suresh)
+
 Release 0.23.1 - 2012-02-17 
 Release 0.23.1 - 2012-02-17 
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 19 - 4
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java

@@ -1456,11 +1456,12 @@ public abstract class FileSystem extends Configured implements Closeable {
         results = listStatus(parentPaths, fp);
         results = listStatus(parentPaths, fp);
         hasGlob[0] = true;
         hasGlob[0] = true;
       } else { // last component does not have a pattern
       } else { // last component does not have a pattern
+        // remove the quoting of metachars in a non-regexp expansion
+        String name = unquotePathComponent(components[components.length - 1]);
         // get all the path names
         // get all the path names
         ArrayList<Path> filteredPaths = new ArrayList<Path>(parentPaths.length);
         ArrayList<Path> filteredPaths = new ArrayList<Path>(parentPaths.length);
         for (int i = 0; i < parentPaths.length; i++) {
         for (int i = 0; i < parentPaths.length; i++) {
-          parentPaths[i] = new Path(parentPaths[i],
-            components[components.length - 1]);
+          parentPaths[i] = new Path(parentPaths[i], name);
           if (fp.accept(parentPaths[i])) {
           if (fp.accept(parentPaths[i])) {
             filteredPaths.add(parentPaths[i]);
             filteredPaths.add(parentPaths[i]);
           }
           }
@@ -1503,14 +1504,28 @@ public abstract class FileSystem extends Configured implements Closeable {
     if (fp.hasPattern()) {
     if (fp.hasPattern()) {
       parents = FileUtil.stat2Paths(listStatus(parents, fp));
       parents = FileUtil.stat2Paths(listStatus(parents, fp));
       hasGlob[0] = true;
       hasGlob[0] = true;
-    } else {
+    } else { // the component does not have a pattern
+      // remove the quoting of metachars in a non-regexp expansion
+      String name = unquotePathComponent(filePattern[level]);
       for (int i = 0; i < parents.length; i++) {
       for (int i = 0; i < parents.length; i++) {
-        parents[i] = new Path(parents[i], filePattern[level]);
+        parents[i] = new Path(parents[i], name);
       }
       }
     }
     }
     return globPathsLevel(parents, filePattern, level + 1, hasGlob);
     return globPathsLevel(parents, filePattern, level + 1, hasGlob);
   }
   }
 
 
+  /**
+   * The glob filter builds a regexp per path component.  If the component
+   * does not contain a shell metachar, then it falls back to appending the
+   * raw string to the list of built up paths.  This raw path needs to have
+   * the quoting removed.  Ie. convert all occurances of "\X" to "X"
+   * @param name of the path component
+   * @return the unquoted path component
+   */
+  private String unquotePathComponent(String name) {
+    return name.replaceAll("\\\\(.)", "$1");
+  }
+  
   /**
   /**
    * List the statuses of the files/directories in the given path if the path is
    * List the statuses of the files/directories in the given path if the path is
    * a directory. 
    * a directory. 

+ 3 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java

@@ -161,7 +161,9 @@ public class Path implements Comparable {
   private String normalizePath(String path) {
   private String normalizePath(String path) {
     // remove double slashes & backslashes
     // remove double slashes & backslashes
     path = StringUtils.replace(path, "//", "/");
     path = StringUtils.replace(path, "//", "/");
-    path = StringUtils.replace(path, "\\", "/");
+    if (Path.WINDOWS) {
+      path = StringUtils.replace(path, "\\", "/");
+    }
     
     
     // trim trailing slash from non-root path (ignoring windows drive)
     // trim trailing slash from non-root path (ignoring windows drive)
     int minLength = hasWindowsDrive(path, true) ? 4 : 1;
     int minLength = hasWindowsDrive(path, true) ? 4 : 1;

+ 58 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestPath.java

@@ -21,6 +21,7 @@ package org.apache.hadoop.fs;
 import java.io.IOException;
 import java.io.IOException;
 import java.net.URI;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URISyntaxException;
+import java.util.Arrays;
 
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.AvroTestUtil;
 import org.apache.hadoop.io.AvroTestUtil;
@@ -264,4 +265,61 @@ public class TestPath extends TestCase {
        "{\"type\":\"string\",\"java-class\":\"org.apache.hadoop.fs.Path\"}");
        "{\"type\":\"string\",\"java-class\":\"org.apache.hadoop.fs.Path\"}");
   }
   }
 
 
+  public void testGlobEscapeStatus() throws Exception {
+    FileSystem lfs = FileSystem.getLocal(new Configuration());
+    Path testRoot = lfs.makeQualified(new Path(
+        System.getProperty("test.build.data","test/build/data"),
+        "testPathGlob"));
+    lfs.delete(testRoot, true);
+    lfs.mkdirs(testRoot);
+    assertTrue(lfs.isDirectory(testRoot));
+    lfs.setWorkingDirectory(testRoot);
+    
+    // create a couple dirs with file in them
+    Path paths[] = new Path[]{
+        new Path(testRoot, "*/f"),
+        new Path(testRoot, "d1/f"),
+        new Path(testRoot, "d2/f")
+    };
+    Arrays.sort(paths);
+    for (Path p : paths) {
+      lfs.create(p).close();
+      assertTrue(lfs.exists(p));
+    }
+
+    // try the non-globbed listStatus
+    FileStatus stats[] = lfs.listStatus(new Path(testRoot, "*"));
+    assertEquals(1, stats.length);
+    assertEquals(new Path(testRoot, "*/f"), stats[0].getPath());
+
+    // ensure globStatus with "*" finds all dir contents
+    stats = lfs.globStatus(new Path(testRoot, "*"));
+    Arrays.sort(stats);
+    assertEquals(paths.length, stats.length);
+    for (int i=0; i < paths.length; i++) {
+      assertEquals(paths[i].getParent(), stats[i].getPath());
+    }
+
+    // ensure that globStatus with an escaped "\*" only finds "*"
+    stats = lfs.globStatus(new Path(testRoot, "\\*"));
+    assertEquals(1, stats.length);
+    assertEquals(new Path(testRoot, "*"), stats[0].getPath());
+
+    // try to glob the inner file for all dirs
+    stats = lfs.globStatus(new Path(testRoot, "*/f"));
+    assertEquals(paths.length, stats.length);
+    for (int i=0; i < paths.length; i++) {
+      assertEquals(paths[i], stats[i].getPath());
+    }
+
+    // try to get the inner file for only the "*" dir
+    stats = lfs.globStatus(new Path(testRoot, "\\*/f"));
+    assertEquals(1, stats.length);
+    assertEquals(new Path(testRoot, "*/f"), stats[0].getPath());
+
+    // try to glob all the contents of the "*" dir
+    stats = lfs.globStatus(new Path(testRoot, "\\*/*"));
+    assertEquals(1, stats.length);
+    assertEquals(new Path(testRoot, "*/f"), stats[0].getPath());
+  }
 }
 }