Ver Fonte

HADOOP-8989. hadoop fs -find feature (Jonathan Allen via aw)

Allen Wittenauer há 10 anos atrás
pai
commit
c060d60a40

+ 2 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -23,6 +23,8 @@ Release 2.7.0 - UNRELEASED
     Mike Liddell, Chuan Liu, Lengning Liu, Ivan Mitic, Michael Rys,
     Alexander Stojanovich, Brian Swan, and Min Wei via cnauroth)
 
+    HADOOP-8989. hadoop fs -find feature (Jonathan Allen via aw)
+
   IMPROVEMENTS
 
     HADOOP-11483. HardLink.java should use the jdk7 createLink method (aajisaka)

+ 27 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Command.java

@@ -64,6 +64,8 @@ abstract public class Command extends Configured {
   public PrintStream out = System.out;
   /** allows stderr to be captured if necessary */
   public PrintStream err = System.err;
+  /** allows the command factory to be used if necessary */
+  private CommandFactory commandFactory = null;
 
   /** Constructor */
   protected Command() {
@@ -120,6 +122,15 @@ abstract public class Command extends Configured {
     return exitCode;
   }
 
+  /** sets the command factory for later use */
+  public void setCommandFactory(CommandFactory factory) {
+    this.commandFactory = factory;
+  }
+  /** retrieves the command factory */
+  protected CommandFactory getCommandFactory() {
+    return this.commandFactory;
+  }
+
   /**
    * Invokes the command handler.  The default behavior is to process options,
    * expand arguments, and then process each argument.
@@ -304,7 +315,7 @@ abstract public class Command extends Configured {
     for (PathData item : items) {
       try {
         processPath(item);
-        if (recursive && item.stat.isDirectory()) {
+        if (recursive && isPathRecursable(item)) {
           recursePath(item);
         }
         postProcessPath(item);
@@ -314,6 +325,21 @@ abstract public class Command extends Configured {
     }
   }
 
+  /**
+   * Determines whether a {@link PathData} item is recursable. Default
+   * implementation is to recurse directories but can be overridden to recurse
+   * through symbolic links.
+   *
+   * @param item
+   *          a {@link PathData} object
+   * @return true if the item is recursable, false otherwise
+   * @throws IOException
+   *           if anything goes wrong in the user-implementation
+   */
+  protected boolean isPathRecursable(PathData item) throws IOException {
+    return item.stat.isDirectory();
+  }
+
   /**
    * Hook for commands to implement an operation to be applied on each
    * path for the command.  Note implementation of this method is optional

+ 1 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandFactory.java

@@ -124,6 +124,7 @@ public class CommandFactory extends Configured {
       if (cmdClass != null) {
         instance = ReflectionUtils.newInstance(cmdClass, conf);
         instance.setName(cmdName);
+        instance.setCommandFactory(this);
       }
     }
     return instance;

+ 2 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java

@@ -25,6 +25,7 @@ import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FsShellPermissions;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.shell.find.Find;
 
 /**
  * Base class for all "hadoop fs" commands
@@ -48,6 +49,7 @@ abstract public class FsCommand extends Command {
     factory.registerCommands(Count.class);
     factory.registerCommands(Delete.class);
     factory.registerCommands(Display.class);
+    factory.registerCommands(Find.class);
     factory.registerCommands(FsShellPermissions.class);
     factory.registerCommands(FsUsage.class);
     factory.registerCommands(Ls.class);

+ 43 - 0
hadoop-common-project/hadoop-common/src/site/apt/FileSystemShell.apt.vm

@@ -232,6 +232,49 @@ expunge
    Empty the Trash. Refer to the {{{../hadoop-hdfs/HdfsDesign.html}
    HDFS Architecture Guide}} for more information on the Trash feature.
 
+find
+
+   Usage: <<<hdfs dfs -find <path> ... <expression> ... >>>
+
+   Finds all files that match the specified expression and applies selected
+   actions to them. If no <path> is specified then defaults to the current
+   working directory. If no expression is specified then defaults to -print.
+
+   The following primary expressions are recognised:
+
+     * -name pattern \
+       -iname pattern
+
+       Evaluates as true if the basename of the file matches the pattern using
+       standard file system globbing. If -iname is used then the match is case
+       insensitive.
+
+     * -print \
+       -print0
+
+       Always evaluates to true. Causes the current pathname to be written to
+       standard output. If the -print0 expression is used then an ASCII NULL
+       character is appended.
+
+   The following operators are recognised:
+
+     * expression -a expression \
+       expression -and expression \
+       expression expression
+
+       Logical AND operator for joining two expressions. Returns true if both
+       child expressions return true. Implied by the juxtaposition of two
+       expressions and so does not need to be explicitly specified. The second
+       expression will not be applied if the first fails.
+
+   Example:
+
+   <<<hdfs dfs -find / -name test -print>>>
+
+   Exit Code:
+
+     Returns 0 on success and -1 on error.
+
 get
 
    Usage: <<<hdfs dfs -get [-ignorecrc] [-crc] <src> <localdst> >>>

+ 44 - 0
hadoop-common-project/hadoop-common/src/test/resources/testConf.xml

@@ -979,6 +979,50 @@
       </comparators>
     </test>
 
+    <test> <!-- TESTED -->
+      <description>help: help for find</description>
+      <test-commands>
+        <command>-help find</command>
+      </test-commands>
+      <cleanup-commands>
+      </cleanup-commands>
+      <comparators>
+        <comparator>
+          <type>RegexpAcrossOutputComparator</type>
+          <expected-output>-find &lt;path&gt; \.\.\. &lt;expression&gt; \.\.\. :
+  Finds all files that match the specified expression and
+  applies selected actions to them\. If no &lt;path&gt; is specified
+  then defaults to the current working directory\. If no
+  expression is specified then defaults to -print\.
+  
+  The following primary expressions are recognised:
+    -name pattern
+    -iname pattern
+      Evaluates as true if the basename of the file matches the
+      pattern using standard file system globbing\.
+      If -iname is used then the match is case insensitive\.
+  
+    -print
+    -print0
+      Always evaluates to true. Causes the current pathname to be
+      written to standard output followed by a newline. If the -print0
+      expression is used then an ASCII NULL character is appended rather
+      than a newline.
+  
+  The following operators are recognised:
+    expression -a expression
+    expression -and expression
+    expression expression
+      Logical AND operator for joining two expressions\. Returns
+      true if both child expressions return true\. Implied by the
+      juxtaposition of two expressions and so does not need to be
+      explicitly specified\. The second expression will not be
+      applied if the first fails\.
+</expected-output>	
+        </comparator>
+      </comparators>
+    </test>
+
     <test> <!-- TESTED -->
       <description>help: help for help</description>
       <test-commands>

+ 223 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml

@@ -16841,5 +16841,228 @@
         </comparator>
       </comparators>
     </test>
+
+    <!-- Tests for find -->
+    <test> <!-- TESTED -->
+      <description>find: default expression</description>
+      <test-commands>
+        <command>-fs NAMENODE -mkdir /donotfind</command>
+        <command>-fs NAMENODE -mkdir donotfind</command>
+        <command>-fs NAMENODE -mkdir /findtest</command>
+        <command>-fs NAMENODE -mkdir /findtest/item1</command>
+        <command>-fs NAMENODE -mkdir /findtest/item1/item1a</command>
+        <command>-fs NAMENODE -touchz /findtest/item1/item1a/item1aa</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item1/item1b</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item2</command>
+        <command>-fs NAMENODE -mkdir /findtest/item3</command>
+        <command>-fs NAMENODE -mkdir /findtest/item4</command>
+        <command>-fs NAMENODE -mkdir /findtest/item4/item4a</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data120bytes /findtest/item4/item4b</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data1k /findtest/item5</command>
+        <command>-fs NAMENODE -find /findtest</command>
+      </test-commands>
+      <cleanup-commands>
+        <command>-fs NAMENODE -rm -r /donotfind</command>
+        <command>-fs NAMENODE -rm -r donotfind</command>
+        <command>-fs NAMENODE -rm -r /findtest</command>
+      </cleanup-commands>
+      <comparators>
+        <comparator>
+          <type>RegexpAcrossOutputComparator</type>
+          <expected-output>^/findtest
+/findtest/item1
+/findtest/item1/item1a
+/findtest/item1/item1a/item1aa
+/findtest/item1/item1b
+/findtest/item2
+/findtest/item3
+/findtest/item4
+/findtest/item4/item4a
+/findtest/item4/item4b
+/findtest/item5
+$</expected-output>
+        </comparator>
+      </comparators>
+    </test>
+    <test> <!-- TESTED -->
+      <description>find: -print </description>
+      <test-commands>
+        <command>-fs NAMENODE -mkdir /donotfind</command>
+        <command>-fs NAMENODE -mkdir donotfind</command>
+        <command>-fs NAMENODE -mkdir /findtest</command>
+        <command>-fs NAMENODE -mkdir /findtest/item1</command>
+        <command>-fs NAMENODE -mkdir /findtest/item1/item1a</command>
+        <command>-fs NAMENODE -touchz /findtest/item1/item1a/item1aa</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item1/item1b</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item2</command>
+        <command>-fs NAMENODE -mkdir /findtest/item3</command>
+        <command>-fs NAMENODE -mkdir /findtest/item4</command>
+        <command>-fs NAMENODE -mkdir /findtest/item4/item4a</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data120bytes /findtest/item4/item4b</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data1k /findtest/item5</command>
+        <command>-fs NAMENODE -find /findtest -print</command>
+      </test-commands>
+      <cleanup-commands>
+        <command>-fs NAMENODE -rm -r /donotfind</command>
+        <command>-fs NAMENODE -rm -r donotfind</command>
+        <command>-fs NAMENODE -rm -r /findtest</command>
+      </cleanup-commands>
+      <comparators>
+        <comparator>
+          <type>RegexpAcrossOutputComparator</type>
+          <expected-output>^/findtest
+/findtest/item1
+/findtest/item1/item1a
+/findtest/item1/item1a/item1aa
+/findtest/item1/item1b
+/findtest/item2
+/findtest/item3
+/findtest/item4
+/findtest/item4/item4a
+/findtest/item4/item4b
+/findtest/item5
+$</expected-output>
+        </comparator>
+      </comparators>
+    </test>
+    <test> <!-- TESTED -->
+      <description>find: -print (relative path) </description>
+      <test-commands>
+        <command>-fs NAMENODE -mkdir /donotfind</command>
+        <command>-fs NAMENODE -mkdir -p donotfind</command>
+        <command>-fs NAMENODE -mkdir -p findtest</command>
+        <command>-fs NAMENODE -mkdir -p findtest/item1</command>
+        <command>-fs NAMENODE -mkdir -p findtest/item1/item1a</command>
+        <command>-fs NAMENODE -touchz findtest/item1/item1a/item1aa</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data60bytes findtest/item1/item1b</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data60bytes findtest/item2</command>
+        <command>-fs NAMENODE -mkdir -p findtest/item3</command>
+        <command>-fs NAMENODE -mkdir -p findtest/item4</command>
+        <command>-fs NAMENODE -mkdir -p findtest/item4/item4a</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data120bytes findtest/item4/item4b</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data1k findtest/item5</command>
+        <command>-fs NAMENODE -find findtest -print</command>
+      </test-commands>
+      <cleanup-commands>
+        <command>-fs NAMENODE -rm -r /donotfind</command>
+        <command>-fs NAMENODE -rm -r donotfind</command>
+        <command>-fs NAMENODE -rm -r findtest</command>
+      </cleanup-commands>
+      <comparators>
+        <comparator>
+          <type>RegexpAcrossOutputComparator</type>
+          <expected-output>^findtest
+findtest/item1
+findtest/item1/item1a
+findtest/item1/item1a/item1aa
+findtest/item1/item1b
+findtest/item2
+findtest/item3
+findtest/item4
+findtest/item4/item4a
+findtest/item4/item4b
+findtest/item5
+$</expected-output>
+        </comparator>
+      </comparators>
+    </test>
+    <test> <!-- TESTED -->
+      <description>find: -print (cwd) </description>
+      <test-commands>
+        <command>-fs NAMENODE -mkdir /donotfind</command>
+        <command>-fs NAMENODE -mkdir findtest</command>
+        <command>-fs NAMENODE -mkdir findtest/item1</command>
+        <command>-fs NAMENODE -mkdir findtest/item1/item1a</command>
+        <command>-fs NAMENODE -touchz findtest/item1/item1a/item1aa</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data60bytes findtest/item1/item1b</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data60bytes findtest/item2</command>
+        <command>-fs NAMENODE -mkdir findtest/item3</command>
+        <command>-fs NAMENODE -mkdir findtest/item4</command>
+        <command>-fs NAMENODE -mkdir findtest/item4/item4a</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data120bytes findtest/item4/item4b</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data1k findtest/item5</command>
+        <command>-fs NAMENODE -find -print</command>
+      </test-commands>
+      <cleanup-commands>
+        <command>-fs NAMENODE -rm -r findtest</command>
+        <command>-fs NAMENODE -rm -r /donotfind</command>
+      </cleanup-commands>
+      <comparators>
+        <comparator>
+          <type>RegexpAcrossOutputComparator</type>
+          <expected-output>^.
+findtest
+findtest/item1
+findtest/item1/item1a
+findtest/item1/item1a/item1aa
+findtest/item1/item1b
+findtest/item2
+findtest/item3
+findtest/item4
+findtest/item4/item4a
+findtest/item4/item4b
+findtest/item5
+$</expected-output>
+        </comparator>
+      </comparators>
+    </test>
+    <test> <!-- TESTED -->
+      <description>find: -name </description>
+      <test-commands>
+        <command>-fs NAMENODE -mkdir /findtest</command>
+        <command>-fs NAMENODE -mkdir /findtest/item1</command>
+        <command>-fs NAMENODE -mkdir /findtest/item1/item1a</command>
+        <command>-fs NAMENODE -touchz /findtest/item1/item1a/item1aa</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item1/item1b</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item2</command>
+        <command>-fs NAMENODE -mkdir /findtest/item3</command>
+        <command>-fs NAMENODE -mkdir /findtest/item4</command>
+        <command>-fs NAMENODE -mkdir /findtest/item4/item4a</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data120bytes /findtest/item4/item4b</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data1k /findtest/item5</command>
+        <command>-fs NAMENODE -find /findtest -name item*a</command>
+      </test-commands>
+      <cleanup-commands>
+        <command>-fs NAMENODE -rm -r /findtest</command>
+      </cleanup-commands>
+      <comparators>
+        <comparator>
+          <type>RegexpAcrossOutputComparator</type>
+          <expected-output>^/findtest/item1/item1a
+/findtest/item1/item1a/item1aa
+/findtest/item4/item4a
+$</expected-output>
+        </comparator>
+      </comparators>
+    </test>
+    <test> <!-- TESTED -->
+      <description>find: -iname </description>
+      <test-commands>
+        <command>-fs NAMENODE -mkdir /findtest</command>
+        <command>-fs NAMENODE -mkdir /findtest/item1</command>
+        <command>-fs NAMENODE -mkdir /findtest/item1/item1a</command>
+        <command>-fs NAMENODE -touchz /findtest/item1/item1a/item1aa</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item1/item1b</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item2</command>
+        <command>-fs NAMENODE -mkdir /findtest/item3</command>
+        <command>-fs NAMENODE -mkdir /findtest/item4</command>
+        <command>-fs NAMENODE -mkdir /findtest/item4/item4a</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data120bytes /findtest/item4/item4b</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data1k /findtest/item5</command>
+        <command>-fs NAMENODE -find /findtest -iname ITEM*a</command>
+      </test-commands>
+      <cleanup-commands>
+        <command>-fs NAMENODE -rm -r /findtest</command>
+      </cleanup-commands>
+      <comparators>
+        <comparator>
+          <type>RegexpAcrossOutputComparator</type>
+          <expected-output>^/findtest/item1/item1a
+/findtest/item1/item1a/item1aa
+/findtest/item4/item4a
+$</expected-output>
+        </comparator>
+      </comparators>
+    </test>
   </tests>
 </configuration>