|
@@ -83,6 +83,15 @@ class Globber {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * Convert a path component that contains backslash ecape sequences to a
|
|
|
+ * literal string. This is necessary when you want to explicitly refer to a
|
|
|
+ * path that contains globber metacharacters.
|
|
|
+ */
|
|
|
+ private static String unescapePathComponent(String name) {
|
|
|
+ return name.replaceAll("\\\\(.)", "$1");
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
* Translate an absolute path into a list of path components.
|
|
|
* We merge double slashes into a single slash here.
|
|
@@ -166,37 +175,72 @@ class Globber {
|
|
|
new Path(scheme, authority, Path.SEPARATOR)));
|
|
|
}
|
|
|
|
|
|
- for (String component : components) {
|
|
|
+ for (int componentIdx = 0; componentIdx < components.size();
|
|
|
+ componentIdx++) {
|
|
|
ArrayList<FileStatus> newCandidates =
|
|
|
new ArrayList<FileStatus>(candidates.size());
|
|
|
- GlobFilter globFilter = new GlobFilter(component);
|
|
|
+ GlobFilter globFilter = new GlobFilter(components.get(componentIdx));
|
|
|
+ String component = unescapePathComponent(components.get(componentIdx));
|
|
|
if (globFilter.hasPattern()) {
|
|
|
sawWildcard = true;
|
|
|
}
|
|
|
if (candidates.isEmpty() && sawWildcard) {
|
|
|
+ // Optimization: if there are no more candidates left, stop examining
|
|
|
+ // the path components. We can only do this if we've already seen
|
|
|
+ // a wildcard component-- otherwise, we still need to visit all path
|
|
|
+ // components in case one of them is a wildcard.
|
|
|
break;
|
|
|
}
|
|
|
- for (FileStatus candidate : candidates) {
|
|
|
- FileStatus resolvedCandidate = candidate;
|
|
|
- if (candidate.isSymlink()) {
|
|
|
- // We have to resolve symlinks, because otherwise we don't know
|
|
|
- // whether they are directories.
|
|
|
- resolvedCandidate = getFileStatus(candidate.getPath());
|
|
|
+ if ((componentIdx < components.size() - 1) &&
|
|
|
+ (!globFilter.hasPattern())) {
|
|
|
+ // Optimization: if this is not the terminal path component, and we
|
|
|
+ // are not matching against a glob, assume that it exists. If it
|
|
|
+ // doesn't exist, we'll find out later when resolving a later glob
|
|
|
+ // or the terminal path component.
|
|
|
+ for (FileStatus candidate : candidates) {
|
|
|
+ candidate.setPath(new Path(candidate.getPath(), component));
|
|
|
}
|
|
|
- if (resolvedCandidate == null ||
|
|
|
- resolvedCandidate.isDirectory() == false) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- FileStatus[] children = listStatus(candidate.getPath());
|
|
|
- for (FileStatus child : children) {
|
|
|
- // Set the child path based on the parent path.
|
|
|
- // This keeps the symlinks in our path.
|
|
|
- child.setPath(new Path(candidate.getPath(),
|
|
|
- child.getPath().getName()));
|
|
|
- if (globFilter.accept(child.getPath())) {
|
|
|
- newCandidates.add(child);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ for (FileStatus candidate : candidates) {
|
|
|
+ if (globFilter.hasPattern()) {
|
|
|
+ FileStatus[] children = listStatus(candidate.getPath());
|
|
|
+ if (children.length == 1) {
|
|
|
+ // If we get back only one result, this could be either a listing
|
|
|
+ // of a directory with one entry, or it could reflect the fact
|
|
|
+ // that what we listed resolved to a file.
|
|
|
+ //
|
|
|
+ // Unfortunately, we can't just compare the returned paths to
|
|
|
+ // figure this out. Consider the case where you have /a/b, where
|
|
|
+ // b is a symlink to "..". In that case, listing /a/b will give
|
|
|
+ // back "/a/b" again. If we just went by returned pathname, we'd
|
|
|
+ // incorrectly conclude that /a/b was a file and should not match
|
|
|
+ // /a/*/*. So we use getFileStatus of the path we just listed to
|
|
|
+ // disambiguate.
|
|
|
+ if (!getFileStatus(candidate.getPath()).isDirectory()) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
}
|
|
|
- }
|
|
|
+ for (FileStatus child : children) {
|
|
|
+ // Set the child path based on the parent path.
|
|
|
+ child.setPath(new Path(candidate.getPath(),
|
|
|
+ child.getPath().getName()));
|
|
|
+ if (globFilter.accept(child.getPath())) {
|
|
|
+ newCandidates.add(child);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ // When dealing with non-glob components, use getFileStatus
|
|
|
+ // instead of listStatus. This is an optimization, but it also
|
|
|
+ // is necessary for correctness in HDFS, since there are some
|
|
|
+ // special HDFS directories like .reserved and .snapshot that are
|
|
|
+ // not visible to listStatus, but which do exist. (See HADOOP-9877)
|
|
|
+ FileStatus childStatus = getFileStatus(
|
|
|
+ new Path(candidate.getPath(), component));
|
|
|
+ if (childStatus != null) {
|
|
|
+ newCandidates.add(childStatus);
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
candidates = newCandidates;
|
|
|
}
|