浏览代码

HADOOP-11483. HardLink.java should use the jdk7 createLink method

(cherry picked from commit db51548f706ccd2d0200745ab89e27610c6d10bc)
Akira Ajisaka 10 年之前
父节点
当前提交
0f33d45948

+ 2 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -25,6 +25,8 @@ Release 2.7.0 - UNRELEASED
 
   IMPROVEMENTS
 
+    HADOOP-11483. HardLink.java should use the jdk7 createLink method (aajisaka)
+
     HADOOP-11156. DelegateToFileSystem should implement
     getFsStatus(final Path f). (Zhihai Xu via wang)
 

+ 18 - 295
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HardLink.java

@@ -23,13 +23,16 @@ import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.StringReader;
-import java.util.Arrays;
 
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.util.Shell;
 import org.apache.hadoop.util.Shell.ExitCodeException;
 import org.apache.hadoop.util.Shell.ShellCommandExecutor;
 
+import com.google.common.annotations.VisibleForTesting;
+
+import static java.nio.file.Files.createLink;
+
 /**
  * Class for creating hardlinks.
  * Supports Unix/Linux, Windows via winutils , and Mac OS X.
@@ -75,114 +78,30 @@ public class HardLink {
   
   /**
    * This abstract class bridges the OS-dependent implementations of the 
-   * needed functionality for creating hardlinks and querying link counts.
+   * needed functionality for querying link counts.
    * The particular implementation class is chosen during 
    * static initialization phase of the HardLink class.
-   * The "getter" methods construct shell command strings for various purposes.
+   * The "getter" methods construct shell command strings.
    */
   private static abstract class HardLinkCommandGetter {
-
-    /**
-     * Get the command string needed to hardlink a bunch of files from
-     * a single source directory into a target directory.  The source directory
-     * is not specified here, but the command will be executed using the source
-     * directory as the "current working directory" of the shell invocation.
-     * 
-     * @param fileBaseNames - array of path-less file names, relative
-     *            to the source directory
-     * @param linkDir - target directory where the hardlinks will be put
-     * @return - an array of Strings suitable for use as a single shell command
-     * @throws IOException - if any of the file or path names misbehave
-     */
-    abstract String[] linkMult(String[] fileBaseNames, File linkDir) 
-                          throws IOException;
-    
-    /**
-     * Get the command string needed to hardlink a single file
-     */
-    abstract String[] linkOne(File file, File linkName) throws IOException;
-    
     /**
      * Get the command string to query the hardlink count of a file
      */
     abstract String[] linkCount(File file) throws IOException;
-    
-    /**
-     * Calculate the total string length of the shell command
-     * resulting from execution of linkMult, plus the length of the
-     * source directory name (which will also be provided to the shell)
-     * 
-     * @param fileDir - source directory, parent of fileBaseNames
-     * @param fileBaseNames - array of path-less file names, relative
-     *            to the source directory
-     * @param linkDir - target directory where the hardlinks will be put
-     * @return - total data length (must not exceed maxAllowedCmdArgLength)
-     * @throws IOException
-     */
-    abstract int getLinkMultArgLength(
-                     File fileDir, String[] fileBaseNames, File linkDir) 
-                     throws IOException;
-    
-    /**
-     * Get the maximum allowed string length of a shell command on this OS,
-     * which is just the documented minimum guaranteed supported command
-     * length - aprx. 32KB for Unix, and 8KB for Windows.
-     */
-    abstract int getMaxAllowedCmdArgLength(); 
   }
   
   /**
    * Implementation of HardLinkCommandGetter class for Unix
    */
-  static class HardLinkCGUnix extends HardLinkCommandGetter {
-    private static String[] hardLinkCommand = {"ln", null, null};
-    private static String[] hardLinkMultPrefix = {"ln"};
-    private static String[] hardLinkMultSuffix = {null};
+  private static class HardLinkCGUnix extends HardLinkCommandGetter {
     private static String[] getLinkCountCommand = {"stat","-c%h", null};
-    //Unix guarantees at least 32K bytes cmd length.
-    //Subtract another 64b to allow for Java 'exec' overhead
-    private static final int maxAllowedCmdArgLength = 32*1024 - 65;
-    
     private static synchronized 
     void setLinkCountCmdTemplate(String[] template) {
       //May update this for specific unix variants, 
       //after static initialization phase
       getLinkCountCommand = template;
     }
-    
-    /*
-     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
-     */
-    @Override
-    String[] linkOne(File file, File linkName) 
-    throws IOException {
-      String[] buf = new String[hardLinkCommand.length];
-      System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
-      //unix wants argument order: "ln <existing> <new>"
-      buf[1] = FileUtil.makeShellPath(file, true); 
-      buf[2] = FileUtil.makeShellPath(linkName, true);
-      return buf;
-    }
-    
-    /*
-     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
-     */
-    @Override
-    String[] linkMult(String[] fileBaseNames, File linkDir) 
-    throws IOException {
-      String[] buf = new String[fileBaseNames.length 
-                                + hardLinkMultPrefix.length 
-                                + hardLinkMultSuffix.length];
-      int mark=0;
-      System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 
-                       hardLinkMultPrefix.length);
-      mark += hardLinkMultPrefix.length;
-      System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
-      mark += fileBaseNames.length;
-      buf[mark] = FileUtil.makeShellPath(linkDir, true);
-      return buf;
-    }
-    
+
     /*
      * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
      */
@@ -195,169 +114,30 @@ public class HardLink {
       buf[getLinkCountCommand.length - 1] = FileUtil.makeShellPath(file, true);
       return buf;
     }
-    
-    /*
-     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
-     */
-    @Override
-    int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 
-    throws IOException{
-      int sum = 0;
-      for (String x : fileBaseNames) {
-        // add 1 to account for terminal null or delimiter space
-        sum += 1 + ((x == null) ? 0 : x.length());
-      }
-      sum += 2 + FileUtil.makeShellPath(fileDir, true).length()
-             + FileUtil.makeShellPath(linkDir, true).length();
-      //add the fixed overhead of the hardLinkMult prefix and suffix
-      sum += 3; //length("ln") + 1
-      return sum;
-    }
-    
-    /*
-     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
-     */
-    @Override
-    int getMaxAllowedCmdArgLength() {
-      return maxAllowedCmdArgLength;
-    }
   }
   
-  
   /**
    * Implementation of HardLinkCommandGetter class for Windows
    */
+  @VisibleForTesting
   static class HardLinkCGWin extends HardLinkCommandGetter {
-    //The Windows command getter impl class and its member fields are
-    //package-private ("default") access instead of "private" to assist 
-    //unit testing (sort of) on non-Win servers
 
-    static String CMD_EXE = "cmd.exe";
-    static String[] hardLinkCommand = {
-                        Shell.WINUTILS,"hardlink","create", null, null};
-    static String[] hardLinkMultPrefix = {
-        CMD_EXE, "/q", "/c", "for", "%f", "in", "("};
-    static String   hardLinkMultDir = "\\%f";
-    static String[] hardLinkMultSuffix = {
-        ")", "do", Shell.WINUTILS, "hardlink", "create", null,
-        "%f"};
     static String[] getLinkCountCommand = {
         Shell.WINUTILS, "hardlink", "stat", null};
-    //Windows guarantees only 8K - 1 bytes cmd length.
-    //Subtract another 64b to allow for Java 'exec' overhead
-    static final int maxAllowedCmdArgLength = 8*1024 - 65;
 
-    /*
-     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkOne(java.io.File, java.io.File)
-     */
-    @Override
-    String[] linkOne(File file, File linkName) 
-    throws IOException {
-      String[] buf = new String[hardLinkCommand.length];
-      System.arraycopy(hardLinkCommand, 0, buf, 0, hardLinkCommand.length);
-      //windows wants argument order: "create <new> <existing>"
-      buf[4] = file.getCanonicalPath(); 
-      buf[3] = linkName.getCanonicalPath();
-      return buf;
-    }
-    
-    /*
-     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkMult(java.lang.String[], java.io.File)
-     */
-    @Override
-    String[] linkMult(String[] fileBaseNames, File linkDir) 
-    throws IOException {
-      String[] buf = new String[fileBaseNames.length 
-                                + hardLinkMultPrefix.length 
-                                + hardLinkMultSuffix.length];
-      String td = linkDir.getCanonicalPath() + hardLinkMultDir;
-      int mark=0;
-      System.arraycopy(hardLinkMultPrefix, 0, buf, mark, 
-                       hardLinkMultPrefix.length);
-      mark += hardLinkMultPrefix.length;
-      System.arraycopy(fileBaseNames, 0, buf, mark, fileBaseNames.length);
-      mark += fileBaseNames.length;
-      System.arraycopy(hardLinkMultSuffix, 0, buf, mark, 
-                       hardLinkMultSuffix.length);
-      mark += hardLinkMultSuffix.length;
-      buf[mark - 2] = td;
-      return buf;
-    }
-    
     /*
      * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#linkCount(java.io.File)
      */
     @Override
-    String[] linkCount(File file) 
-    throws IOException {
+    String[] linkCount(File file) throws IOException {
       String[] buf = new String[getLinkCountCommand.length];
       System.arraycopy(getLinkCountCommand, 0, buf, 0, 
                        getLinkCountCommand.length);
       buf[getLinkCountCommand.length - 1] = file.getCanonicalPath();
       return buf;
     }
-    
-    /*
-     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getLinkMultArgLength(java.io.File, java.lang.String[], java.io.File)
-     */
-    @Override
-    int getLinkMultArgLength(File fileDir, String[] fileBaseNames, File linkDir) 
-    throws IOException {
-      int sum = 0;
-      for (String x : fileBaseNames) {
-        // add 1 to account for terminal null or delimiter space
-        sum += 1 + ((x == null) ? 0 : x.length());
-      }
-      sum += 2 + fileDir.getCanonicalPath().length() +
-               linkDir.getCanonicalPath().length();
-      //add the fixed overhead of the hardLinkMult command 
-      //(prefix, suffix, and Dir suffix)
-      sum += (CMD_EXE + " /q /c for %f in ( ) do "
-              + Shell.WINUTILS + " hardlink create \\%f %f").length();
-      return sum;
-    }
-    
-    /*
-     * @see org.apache.hadoop.fs.HardLink.HardLinkCommandGetter#getMaxAllowedCmdArgLength()
-     */
-    @Override
-    int getMaxAllowedCmdArgLength() {
-      return maxAllowedCmdArgLength;
-    }
   }
-  
-  
-  /**
-   * Calculate the nominal length of all contributors to the total 
-   * commandstring length, including fixed overhead of the OS-dependent 
-   * command.  It's protected rather than private, to assist unit testing,
-   * but real clients are not expected to need it -- see the way 
-   * createHardLinkMult() uses it internally so the user doesn't need to worry
-   * about it.
-   * 
-   * @param fileDir - source directory, parent of fileBaseNames
-   * @param fileBaseNames - array of path-less file names, relative
-   *            to the source directory
-   * @param linkDir - target directory where the hardlinks will be put
-   * @return - total data length (must not exceed maxAllowedCmdArgLength)
-   * @throws IOException
-   */
-  protected static int getLinkMultArgLength(
-          File fileDir, String[] fileBaseNames, File linkDir) 
-  throws IOException {
-    return getHardLinkCommand.getLinkMultArgLength(fileDir, 
-          fileBaseNames, linkDir);
-  }
-  
-  /**
-   * Return this private value for use by unit tests.
-   * Shell commands are not allowed to have a total string length
-   * exceeding this size.
-   */
-  protected static int getMaxAllowedCmdArgLength() {
-    return getHardLinkCommand.getMaxAllowedCmdArgLength();
-  }
-  
+
   /*
    * ****************************************************
    * Complexity is above.  User-visible functionality is below
@@ -370,7 +150,7 @@ public class HardLink {
    * @param linkName - desired target link file
    */
   public static void createHardLink(File file, File linkName) 
-  throws IOException {
+      throws IOException {
     if (file == null) {
       throw new IOException(
           "invalid arguments to createHardLink: source file is null");
@@ -379,17 +159,7 @@ public class HardLink {
       throw new IOException(
           "invalid arguments to createHardLink: link name is null");
     }
-	  // construct and execute shell command
-    String[] hardLinkCommand = getHardLinkCommand.linkOne(file, linkName);
-    ShellCommandExecutor shexec = new ShellCommandExecutor(hardLinkCommand);
-    try {
-      shexec.execute();
-    } catch (ExitCodeException e) {
-      throw new IOException("Failed to execute command " +
-          Arrays.toString(hardLinkCommand) +
-          "; command output: \"" + shexec.getOutput() + "\"" +
-          "; WrappedException: \"" + e.getMessage() + "\"");
-    }
+    createLink(linkName.toPath(), file.toPath());
   }
 
   /**
@@ -398,30 +168,10 @@ public class HardLink {
    * @param parentDir - directory containing source files
    * @param fileBaseNames - list of path-less file names, as returned by 
    *                        parentDir.list()
-   * @param linkDir - where the hardlinks should be put.  It must already exist.
-   * 
-   * If the list of files is too long (overflows maxAllowedCmdArgLength),
-   * we will automatically split it into multiple invocations of the
-   * underlying method.
+   * @param linkDir - where the hardlinks should be put. It must already exist.
    */
-  public static void createHardLinkMult(File parentDir, String[] fileBaseNames, 
+  public static void createHardLinkMult(File parentDir, String[] fileBaseNames,
       File linkDir) throws IOException {
-    //This is the public method all non-test clients are expected to use.
-    //Normal case - allow up to maxAllowedCmdArgLength characters in the cmd
-    createHardLinkMult(parentDir, fileBaseNames, linkDir, 
-                       getHardLinkCommand.getMaxAllowedCmdArgLength());
-  }
-
-  /*
-   * Implements {@link createHardLinkMult} with added variable  "maxLength",
-   * to ease unit testing of the auto-splitting feature for long lists.
-   * Likewise why it returns "callCount", the number of sub-arrays that
-   * the file list had to be split into.
-   * Non-test clients are expected to call the public method instead.
-   */
-  protected static int createHardLinkMult(File parentDir, 
-      String[] fileBaseNames, File linkDir, int maxLength) 
-  throws IOException {
     if (parentDir == null) {
       throw new IOException(
           "invalid arguments to createHardLinkMult: parent directory is null");
@@ -435,40 +185,13 @@ public class HardLink {
           "invalid arguments to createHardLinkMult: "
           + "filename list can be empty but not null");
     }
-    if (fileBaseNames.length == 0) {
-      //the OS cmds can't handle empty list of filenames, 
-      //but it's legal, so just return.
-      return 0; 
-    }
     if (!linkDir.exists()) {
       throw new FileNotFoundException(linkDir + " not found.");
     }
-
-    //if the list is too long, split into multiple invocations
-    int callCount = 0;
-    if (getLinkMultArgLength(parentDir, fileBaseNames, linkDir) > maxLength
-          && fileBaseNames.length > 1) {
-      String[] list1 = Arrays.copyOf(fileBaseNames, fileBaseNames.length/2);
-      callCount += createHardLinkMult(parentDir, list1, linkDir, maxLength);
-      String[] list2 = Arrays.copyOfRange(fileBaseNames, fileBaseNames.length/2,
-          fileBaseNames.length);
-      callCount += createHardLinkMult(parentDir, list2, linkDir, maxLength);  
-      return callCount;
-    } else {
-      callCount = 1;
-    }
-    
-    // construct and execute shell command
-    String[] hardLinkCommand = getHardLinkCommand.linkMult(fileBaseNames, 
-        linkDir);
-    ShellCommandExecutor shexec = new ShellCommandExecutor(hardLinkCommand,
-      parentDir, null, 0L);
-    try {
-      shexec.execute();
-    } catch (ExitCodeException e) {
-      throw new IOException(shexec.getOutput() + e.getMessage());
+    for (String name : fileBaseNames) {
+      createLink(linkDir.toPath().resolve(name),
+                 parentDir.toPath().resolve(name));
     }
-    return callCount;
   }
 
    /**

+ 3 - 88
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHardLink.java

@@ -295,91 +295,11 @@ public class TestHardLink {
     String[] emptyList = {};
     
     //test the case of empty file list
-    int callCount = createHardLinkMult(src, emptyList, tgt_mult, 
-        getMaxAllowedCmdArgLength());
-    //check no exec calls were made
-    assertEquals(0, callCount);
+    createHardLinkMult(src, emptyList, tgt_mult);
     //check nothing changed in the directory tree
     validateSetup();
   }
-  
-  /**
-   * Test createHardLinkMult(), again, this time with the "too long list" 
-   * case where the total size of the command line arguments exceed the 
-   * allowed maximum.  In this case, the list should be automatically 
-   * broken up into chunks, each chunk no larger than the max allowed.
-   * 
-   * We use an extended version of the method call, specifying the
-   * size limit explicitly, to simulate the "too long" list with a 
-   * relatively short list.
-   */
-  @Test
-  public void testCreateHardLinkMultOversizeAndEmpty() throws IOException {
-    
-    // prep long filenames - each name takes 10 chars in the arg list
-    // (9 actual chars plus terminal null or delimeter blank)
-    String name1 = "x11111111";
-    String name2 = "x22222222";
-    String name3 = "x33333333";
-    File x1_long = new File(src, name1);
-    File x2_long = new File(src, name2);
-    File x3_long = new File(src, name3);
-    //set up source files with long file names
-    x1.renameTo(x1_long);
-    x2.renameTo(x2_long);
-    x3.renameTo(x3_long);
-    //validate setup
-    assertTrue(x1_long.exists());
-    assertTrue(x2_long.exists());
-    assertTrue(x3_long.exists());
-    assertFalse(x1.exists());
-    assertFalse(x2.exists());
-    assertFalse(x3.exists());
-    
-    //prep appropriate length information to construct test case for
-    //oversize filename list
-    int callCount;
-    String[] emptyList = {};
-    String[] fileNames = src.list();
-    //get fixed size of arg list without any filenames
-    int overhead = getLinkMultArgLength(src, emptyList, tgt_mult);
-    //select a maxLength that is slightly too short to hold 3 filenames
-    int maxLength = overhead + (int)(2.5 * (float)(1 + name1.length())); 
-    
-    //now test list of three filenames when there is room for only 2.5
-    callCount = createHardLinkMult(src, fileNames, tgt_mult, maxLength);
-    //check the request was completed in exactly two "chunks"
-    assertEquals(2, callCount);
-    String[] tgt_multNames = tgt_mult.list();
-    //sort directory listings before comparsion
-    Arrays.sort(fileNames);
-    Arrays.sort(tgt_multNames);
-    //and check the results were as expected in the dir tree
-    assertArrayEquals(fileNames, tgt_multNames);
-    
-    //Test the case where maxlength is too small even for one filename.
-    //It should go ahead and try the single files.
-    
-    //Clear the test dir tree
-    FileUtil.fullyDelete(tgt_mult);
-    assertFalse(tgt_mult.exists());
-    tgt_mult.mkdirs();
-    assertTrue(tgt_mult.exists() && tgt_mult.list().length == 0);
-    //set a limit size much smaller than a single filename
-    maxLength = overhead + (int)(0.5 * (float)(1 + name1.length()));
-    //attempt the method call
-    callCount = createHardLinkMult(src, fileNames, tgt_mult, 
-        maxLength);
-    //should go ahead with each of the three single file names
-    assertEquals(3, callCount);
-    tgt_multNames = tgt_mult.list();
-    //sort directory listings before comparsion
-    Arrays.sort(fileNames);
-    Arrays.sort(tgt_multNames);
-    //and check the results were as expected in the dir tree
-    assertArrayEquals(fileNames, tgt_multNames);
-  }
-  
+
   /*
    * Assume that this test won't usually be run on a Windows box.
    * This test case allows testing of the correct syntax of the Windows
@@ -392,18 +312,13 @@ public class TestHardLink {
    */
   @Test
   public void testWindowsSyntax() {
-    class win extends HardLinkCGWin {};
+    class win extends HardLinkCGWin {}
 
     //basic checks on array lengths
-    assertEquals(5, win.hardLinkCommand.length); 
-    assertEquals(7, win.hardLinkMultPrefix.length);
-    assertEquals(7, win.hardLinkMultSuffix.length);
     assertEquals(4, win.getLinkCountCommand.length);
 
-    assertTrue(win.hardLinkMultPrefix[4].equals("%f"));
     //make sure "%f" was not munged
     assertEquals(2, ("%f").length()); 
-    assertTrue(win.hardLinkMultDir.equals("\\%f"));
     //make sure "\\%f" was munged correctly
     assertEquals(3, ("\\%f").length()); 
     assertTrue(win.getLinkCountCommand[1].equals("hardlink"));