1
0
Quellcode durchsuchen

HADOOP-928. Make checksums optional per FileSystem. Contributed by Hairong.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@512499 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting vor 18 Jahren
Ursprung
Commit
a7620726e9
33 geänderte Dateien mit 1966 neuen und 1424 gelöschten Zeilen
  1. 3 0
      CHANGES.txt
  2. 41 7
      src/java/org/apache/hadoop/dfs/DFSClient.java
  3. 104 49
      src/java/org/apache/hadoop/dfs/DistributedFileSystem.java
  4. 3 3
      src/java/org/apache/hadoop/dfs/NamenodeFsck.java
  5. 0 1
      src/java/org/apache/hadoop/dfs/TransferFsImage.java
  6. 14 3
      src/java/org/apache/hadoop/filecache/DistributedCache.java
  7. 629 0
      src/java/org/apache/hadoop/fs/ChecksumFileSystem.java
  8. 16 222
      src/java/org/apache/hadoop/fs/FSDataInputStream.java
  9. 7 114
      src/java/org/apache/hadoop/fs/FSDataOutputStream.java
  10. 0 32
      src/java/org/apache/hadoop/fs/FSOutputStream.java
  11. 157 250
      src/java/org/apache/hadoop/fs/FileSystem.java
  12. 26 45
      src/java/org/apache/hadoop/fs/FileUtil.java
  13. 302 0
      src/java/org/apache/hadoop/fs/FilterFileSystem.java
  14. 3 2
      src/java/org/apache/hadoop/fs/FsShell.java
  15. 123 68
      src/java/org/apache/hadoop/fs/InMemoryFileSystem.java
  16. 25 354
      src/java/org/apache/hadoop/fs/LocalFileSystem.java
  17. 376 0
      src/java/org/apache/hadoop/fs/RawLocalFileSystem.java
  18. 22 39
      src/java/org/apache/hadoop/fs/s3/S3FileSystem.java
  19. 1 3
      src/java/org/apache/hadoop/fs/s3/S3OutputStream.java
  20. 2 1
      src/java/org/apache/hadoop/mapred/MapOutputLocation.java
  21. 34 167
      src/java/org/apache/hadoop/mapred/PhasedFileSystem.java
  22. 4 6
      src/java/org/apache/hadoop/util/CopyFiles.java
  23. 2 2
      src/test/org/apache/hadoop/dfs/ClusterTestDFS.java
  24. 2 2
      src/test/org/apache/hadoop/dfs/ClusterTestDFSNamespaceLogging.java
  25. 9 9
      src/test/org/apache/hadoop/dfs/NNBench.java
  26. 4 5
      src/test/org/apache/hadoop/dfs/TestCheckpoint.java
  27. 10 7
      src/test/org/apache/hadoop/dfs/TestDFSShell.java
  28. 8 6
      src/test/org/apache/hadoop/dfs/TestDecommission.java
  29. 4 3
      src/test/org/apache/hadoop/dfs/TestReplication.java
  30. 7 5
      src/test/org/apache/hadoop/dfs/TestSeekBug.java
  31. 6 5
      src/test/org/apache/hadoop/dfs/TestSmallBlock.java
  32. 1 1
      src/test/org/apache/hadoop/fs/TestLocalFileSystem.java
  33. 21 13
      src/test/org/apache/hadoop/fs/s3/S3FileSystemBaseTest.java

+ 3 - 0
CHANGES.txt

@@ -150,6 +150,9 @@ Trunk (unreleased changes)
 44. HADOOP-1042.  Improve the handling of failed map output fetches.
     (Devaraj Das via cutting)
 
+45. HADOOP-928.  Make checksums optional per FileSystem.
+    (Hairong Kuang via cutting)
+
 
 Release 0.11.2 - 2007-02-16
 

+ 41 - 7
src/java/org/apache/hadoop/dfs/DFSClient.java

@@ -201,7 +201,7 @@ class DFSClient implements FSConstants {
      * inner subclass of InputStream that does the right out-of-band
      * work.
      */
-    public FSInputStream open(UTF8 src) throws IOException {
+    public DFSInputStream open(UTF8 src) throws IOException {
         checkOpen();
         //    Get block info from namenode
         return new DFSInputStream(src.toString());
@@ -215,7 +215,7 @@ class DFSClient implements FSConstants {
      * @return output stream
      * @throws IOException
      */
-    public FSOutputStream create( UTF8 src, 
+    public OutputStream create( UTF8 src, 
                                   boolean overwrite
                                 ) throws IOException {
       return create( src, overwrite, defaultReplication, defaultBlockSize, null);
@@ -230,7 +230,7 @@ class DFSClient implements FSConstants {
      * @return output stream
      * @throws IOException
      */
-    public FSOutputStream create( UTF8 src, 
+    public OutputStream create( UTF8 src, 
                                   boolean overwrite,
                                   Progressable progress
                                 ) throws IOException {
@@ -247,7 +247,7 @@ class DFSClient implements FSConstants {
      * @return output stream
      * @throws IOException
      */
-    public FSOutputStream create( UTF8 src, 
+    public OutputStream create( UTF8 src, 
                                   boolean overwrite, 
                                   short replication,
                                   long blockSize
@@ -266,14 +266,14 @@ class DFSClient implements FSConstants {
      * @return output stream
      * @throws IOException
      */
-    public FSOutputStream create( UTF8 src, 
+    public OutputStream create( UTF8 src, 
                                   boolean overwrite, 
                                   short replication,
                                   long blockSize,
                                   Progressable progress
                                 ) throws IOException {
       checkOpen();
-      FSOutputStream result = new DFSOutputStream(src, overwrite, 
+      OutputStream result = new DFSOutputStream(src, overwrite, 
                                                   replication, blockSize, progress);
       synchronized (pendingCreates) {
         pendingCreates.put(src.toString(), result);
@@ -931,11 +931,45 @@ class DFSClient implements FSConstants {
             throw new IOException("Mark not supported");
         }
     }
+    
+    static class DFSDataInputStream extends FSDataInputStream {
+      DFSDataInputStream(DFSInputStream in, Configuration conf)
+      throws IOException {
+        super(in, conf);
+      }
+      
+      DFSDataInputStream(DFSInputStream in, int bufferSize) throws IOException {
+        super(in, bufferSize);
+      }
+      
+      /**
+       * Returns the datanode from which the stream is currently reading.
+       */
+      public DatanodeInfo getCurrentDatanode() {
+        return ((DFSInputStream)inStream).getCurrentDatanode();
+      }
+      
+      /**
+       * Returns the block containing the target position. 
+       */
+      public Block getCurrentBlock() {
+        return ((DFSInputStream)inStream).getCurrentBlock();
+      }
+
+      /**
+       * Used by the automatic tests to detemine blocks locations of a
+       * file
+       */
+      synchronized DatanodeInfo[][] getDataNodes() {
+        return ((DFSInputStream)inStream).getDataNodes();
+      }
+
+    }
 
     /****************************************************************
      * DFSOutputStream creates files from a stream of bytes.
      ****************************************************************/
-    class DFSOutputStream extends FSOutputStream {
+    class DFSOutputStream extends OutputStream {
         private Socket s;
         boolean closed = false;
 

+ 104 - 49
src/java/org/apache/hadoop/dfs/DistributedFileSystem.java

@@ -33,19 +33,21 @@ import org.apache.hadoop.util.*;
  *
  * @author Mike Cafarella
  *****************************************************************/
-public class DistributedFileSystem extends FileSystem {
-    private Path workingDir = 
-      new Path("/user", System.getProperty("user.name"));
-
+public class DistributedFileSystem extends ChecksumFileSystem {
+    private static class RawDistributedFileSystem extends FileSystem {
+    private Path workingDir =
+        new Path("/user", System.getProperty("user.name")); 
     private URI uri;
     private FileSystem localFs;
 
     DFSClient dfs;
 
-    public DistributedFileSystem() {}
+    public RawDistributedFileSystem() {
+    }
+
 
     /** @deprecated */
-    public DistributedFileSystem(InetSocketAddress namenode,
+    public RawDistributedFileSystem(InetSocketAddress namenode,
                                  Configuration conf) throws IOException {
       initialize(URI.create("hdfs://"+
                             namenode.getHostName()+":"+
@@ -119,24 +121,32 @@ public class DistributedFileSystem extends FileSystem {
       return dfs.getHints(getPath(f), start, len);
     }
 
-    public FSInputStream openRaw(Path f) throws IOException {
-      return dfs.open(getPath(f));
-    }
+    public FSDataInputStream open(Path f, int bufferSize) throws IOException {
+      if (! exists(f)) {
+        throw new FileNotFoundException(f.toString());
+      }
 
-    public FSOutputStream createRaw(Path f, boolean overwrite, 
-                                    short replication, long blockSize)
-      throws IOException {
-      return dfs.create(getPath(f), overwrite, replication, blockSize);
+      return new DFSClient.DFSDataInputStream(dfs.open(getPath(f)), bufferSize);
     }
 
-    public FSOutputStream createRaw(Path f, boolean overwrite, 
-                                    short replication, long blockSize,
-                                    Progressable progress)
-      throws IOException {
-      return dfs.create(getPath(f), overwrite, replication, blockSize, progress);
+    public FSDataOutputStream create(Path f, boolean overwrite,
+            int bufferSize, short replication, long blockSize,
+            Progressable progress) throws IOException {
+      if (exists(f) && ! overwrite) {
+         throw new IOException("File already exists:"+f);
+      }
+      Path parent = f.getParent();
+      if (parent != null && !mkdirs(parent)) {
+        throw new IOException("Mkdirs failed to create " + parent);
+      }
+      
+      return new FSDataOutputStream(
+           dfs.create(getPath(f), overwrite,
+                   replication, blockSize, progress),
+           bufferSize);
     }
     
-    public boolean setReplicationRaw( Path src, 
+    public boolean setReplication( Path src, 
                                       short replication
                                     ) throws IOException {
       return dfs.setReplication(getPath(src), replication);
@@ -145,14 +155,14 @@ public class DistributedFileSystem extends FileSystem {
     /**
      * Rename files/dirs
      */
-    public boolean renameRaw(Path src, Path dst) throws IOException {
+    public boolean rename(Path src, Path dst) throws IOException {
       return dfs.rename(getPath(src), getPath(dst));
     }
 
     /**
      * Get rid of Path f, whether a true file or dir.
      */
-    public boolean deleteRaw(Path f) throws IOException {
+    public boolean delete(Path f) throws IOException {
         return dfs.delete(getPath(f));
     }
 
@@ -194,7 +204,7 @@ public class DistributedFileSystem extends FileSystem {
       return info[0].getReplication();
   }
 
-    public Path[] listPathsRaw(Path f) throws IOException {
+    public Path[] listPaths(Path f) throws IOException {
         DFSFileInfo info[] = dfs.listPaths(getPath(f));
         if (info == null) {
             return new Path[0];
@@ -221,16 +231,16 @@ public class DistributedFileSystem extends FileSystem {
         dfs.release(getPath(f));
     }
 
-    public void moveFromLocalFile(Path src, Path dst) throws IOException {
-      FileUtil.copy(localFs, src, this, dst, true, true, getConf());
-    }
-
-    public void copyFromLocalFile(Path src, Path dst) throws IOException {
-      FileUtil.copy(localFs, src, this, dst, false, true, getConf());
+    @Override
+    public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
+    throws IOException {
+      FileUtil.copy(localFs, src, this, dst, delSrc, getConf());
     }
 
-    public void copyToLocalFile(Path src, Path dst, boolean copyCrc) throws IOException {
-      FileUtil.copy(this, src, localFs, dst, false, copyCrc, getConf());
+    @Override
+    public void copyToLocalFile(boolean delSrc, Path src, Path dst)
+    throws IOException {
+      FileUtil.copy(this, src, localFs, dst, delSrc, getConf());
     }
 
     public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
@@ -257,8 +267,41 @@ public class DistributedFileSystem extends FileSystem {
 
     DFSClient getClient() {
         return dfs;
+    }        
+    /** Return the total raw capacity of the filesystem, disregarding
+     * replication .*/
+    public long getRawCapacity() throws IOException{
+        return dfs.totalRawCapacity();
+    }
+
+    /** Return the total raw used space in the filesystem, disregarding
+     * replication .*/
+    public long getRawUsed() throws IOException{
+        return dfs.totalRawUsed();
+    }
+
+    /** Return statistics for each datanode. */
+    public DatanodeInfo[] getDataNodeStats() throws IOException {
+      return dfs.datanodeReport();
     }
     
+    /**
+     * Enter, leave or get safe mode.
+     *  
+     * @see org.apache.hadoop.dfs.ClientProtocol#setSafeMode(FSConstants.SafeModeAction)
+     */
+    public boolean setSafeMode( FSConstants.SafeModeAction action ) 
+    throws IOException {
+      return dfs.setSafeMode( action );
+    }
+
+    /*
+     * Refreshes the list of hosts and excluded hosts from the configured 
+     * files.  
+     */
+    public void refreshNodes() throws IOException {
+      dfs.refreshNodes();
+    }
 
     /**
      * We need to find the blocks that didn't match.  Likely only one 
@@ -266,14 +309,14 @@ public class DistributedFileSystem extends FileSystem {
      * we can consider figuring out exactly which block is corrupt.
      */
     public void reportChecksumFailure(Path f, 
-                                      FSInputStream in, long inPos, 
-                                      FSInputStream sums, long sumsPos) {
+                                      FSDataInputStream in, long inPos, 
+                                      FSDataInputStream sums, long sumsPos) {
       
       LocatedBlock lblocks[] = new LocatedBlock[2];
 
       try {
         // Find block in data stream.
-        DFSClient.DFSInputStream dfsIn = (DFSClient.DFSInputStream) in;
+        DFSClient.DFSDataInputStream dfsIn = (DFSClient.DFSDataInputStream) in;
         Block dataBlock = dfsIn.getCurrentBlock();
         if (dataBlock == null) {
           throw new IOException("Error: Current block in data stream is null! ");
@@ -284,7 +327,7 @@ public class DistributedFileSystem extends FileSystem {
                  " on datanode=" + dataNode[0].getName());
 
         // Find block in checksum stream
-        DFSClient.DFSInputStream dfsSums = (DFSClient.DFSInputStream) sums;
+        DFSClient.DFSDataInputStream dfsSums = (DFSClient.DFSDataInputStream) sums;
         Block sumsBlock = dfsSums.getCurrentBlock();
         if (sumsBlock == null) {
           throw new IOException("Error: Current block in checksum stream is null! ");
@@ -305,32 +348,33 @@ public class DistributedFileSystem extends FileSystem {
       }
 
     }
+    }
+
+    public DistributedFileSystem() {
+        super( new RawDistributedFileSystem() );
+    }
+
+    /** @deprecated */
+    public DistributedFileSystem(InetSocketAddress namenode,
+                                 Configuration conf) throws IOException {
+      super( new RawDistributedFileSystem(namenode, conf) );
+    }
 
     /** Return the total raw capacity of the filesystem, disregarding
      * replication .*/
     public long getRawCapacity() throws IOException{
-        return dfs.totalRawCapacity();
+        return ((RawDistributedFileSystem)fs).getRawCapacity();
     }
 
     /** Return the total raw used space in the filesystem, disregarding
      * replication .*/
     public long getRawUsed() throws IOException{
-        return dfs.totalRawUsed();
-    }
-
-    /** Return the total size of all files in the filesystem.*/
-    public long getUsed()throws IOException{
-        long used = 0;
-        DFSFileInfo dfsFiles[] = dfs.listPaths(getPath(new Path("/")));
-        for(int i=0;i<dfsFiles.length;i++){
-            used += dfsFiles[i].getContentsLen();
-        }
-        return used;
+        return ((RawDistributedFileSystem)fs).getRawUsed();
     }
 
     /** Return statistics for each datanode. */
     public DatanodeInfo[] getDataNodeStats() throws IOException {
-      return dfs.datanodeReport();
+      return ((RawDistributedFileSystem)fs).getDataNodeStats();
     }
     
     /**
@@ -340,7 +384,7 @@ public class DistributedFileSystem extends FileSystem {
      */
     public boolean setSafeMode( FSConstants.SafeModeAction action ) 
     throws IOException {
-      return dfs.setSafeMode( action );
+      return ((RawDistributedFileSystem)fs).setSafeMode( action );
     }
 
     /*
@@ -348,6 +392,17 @@ public class DistributedFileSystem extends FileSystem {
      * files.  
      */
     public void refreshNodes() throws IOException {
-      dfs.refreshNodes();
+      ((RawDistributedFileSystem)fs).refreshNodes();
+    }
+    /**
+     * We need to find the blocks that didn't match.  Likely only one 
+     * is corrupt but we will report both to the namenode.  In the future,
+     * we can consider figuring out exactly which block is corrupt.
+     */
+    public void reportChecksumFailure(Path f, 
+                                      FSDataInputStream in, long inPos, 
+                                      FSDataInputStream sums, long sumsPos) {
+      ((RawDistributedFileSystem)fs).reportChecksumFailure(
+                f, in, inPos, sums, sumsPos);
     }
 }

+ 3 - 3
src/java/org/apache/hadoop/dfs/NamenodeFsck.java

@@ -22,6 +22,7 @@ import java.io.BufferedOutputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
+import java.io.OutputStream;
 import java.io.PrintWriter;
 import java.net.InetSocketAddress;
 import java.net.Socket;
@@ -34,7 +35,6 @@ import javax.servlet.http.HttpServletResponse;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSOutputStream;
 import org.apache.hadoop.io.UTF8;
 import org.apache.hadoop.net.DNS;
 
@@ -255,7 +255,7 @@ public class NamenodeFsck {
       }
       // create chains
       int chain = 0;
-      FSOutputStream fos = null;
+      OutputStream fos = null;
       for (int i = 0; i < blocks.length; i++) {
         LocatedBlock lblock = blocks[i];
         DatanodeInfo[] locs = lblock.getLocations();
@@ -305,7 +305,7 @@ public class NamenodeFsck {
    * around.
    */
       private void copyBlock(DFSClient dfs, LocatedBlock lblock,
-          FSOutputStream fos) throws Exception {
+          OutputStream fos) throws Exception {
     int failures = 0;
     InetSocketAddress targetAddr = null;
     TreeSet deadNodes = new TreeSet();

+ 0 - 1
src/java/org/apache/hadoop/dfs/TransferFsImage.java

@@ -31,7 +31,6 @@ import javax.servlet.http.HttpServletRequest;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSOutputStream;
 import org.apache.hadoop.io.UTF8;
 
 

+ 14 - 3
src/java/org/apache/hadoop/filecache/DistributedCache.java

@@ -288,6 +288,10 @@ public class DistributedCache {
     byte[] digest = null;
 
     FileSystem fileSystem = getFileSystem(cache, conf);
+    if(!(fileSystem instanceof ChecksumFileSystem)) {
+        throw new IOException( "Not a checksummed file system: "
+                +fileSystem.getUri() );
+    }
     String filename = cache.getPath();
     Path filePath = new Path(filename);
     Path md5File = new Path(filePath.getParent().toString() + Path.SEPARATOR
@@ -299,8 +303,15 @@ public class DistributedCache {
       // do nothing
     }
     if (!fileSystem.exists(md5File)) {
-      FSInputStream fsStream = fileSystem.openRaw(FileSystem
-          .getChecksumFile(filePath));
+      ChecksumFileSystem checksumFs;
+      if(!(fileSystem instanceof ChecksumFileSystem)) {
+          throw new IOException(
+                  "Not a checksumed file system: "+fileSystem.getUri());
+      } else {
+          checksumFs = (ChecksumFileSystem)fileSystem;
+      }
+      FSDataInputStream fsStream = checksumFs.getRawFileSystem().open(
+              checksumFs.getChecksumFile(filePath));
       int read = fsStream.read(b);
       while (read != -1) {
         md5.update(b, 0, read);
@@ -313,7 +324,7 @@ public class DistributedCache {
       out.write(digest);
       out.close();
     } else {
-      FSInputStream fsStream = fileSystem.openRaw(md5File);
+      FSDataInputStream fsStream = fileSystem.open(md5File);
       digest = new byte[md5.getDigestLength()];
       // assuming reading 16 bytes once is not a problem
       // though it should be checked if 16 bytes have been read or not

+ 629 - 0
src/java/org/apache/hadoop/fs/ChecksumFileSystem.java

@@ -0,0 +1,629 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.io.*;
+import java.util.Arrays;
+import java.util.zip.CRC32;
+import java.util.zip.Checksum;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.Progressable;
+import org.apache.hadoop.util.StringUtils;
+
+/****************************************************************
+ * Abstract Checksumed FileSystem.
+ * It provide a basice implementation of a Checksumed FileSystem,
+ * which creates a checksum file for each raw file.
+ * It generates & verifies checksums at the client side.
+ *
+ * @author Hairong Kuang
+ *****************************************************************/
+public abstract class ChecksumFileSystem extends FilterFileSystem {
+  private static final byte[] CHECKSUM_VERSION = new byte[] {'c', 'r', 'c', 0};
+
+  public ChecksumFileSystem(FileSystem fs) {
+    super(fs);
+  }
+
+  /** get the raw file system */
+  public FileSystem getRawFileSystem() {
+    return fs;
+  }
+
+  /** Return the name of the checksum file associated with a file.*/
+  public Path getChecksumFile(Path file) {
+    return new Path(file.getParent(), "." + file.getName() + ".crc");
+  }
+
+  /** Return true iff file is a checksum file name.*/
+  public static boolean isChecksumFile(Path file) {
+    String name = file.getName();
+    return name.startsWith(".") && name.endsWith(".crc");
+  }
+
+  /** Return the length of the checksum file given the size of the 
+   * actual file.
+   **/
+  public long getChecksumFileLength(Path file, long fileSize) {
+    return FSOutputSummer.getChecksumLength(fileSize, getBytesPerSum());
+  }
+
+  /** Return the bytes Per Checksum */
+  public int getBytesPerSum() {
+    return getConf().getInt("io.bytes.per.checksum", 512);
+  }
+
+  private int getSumBufferSize(int bytesPerSum, int bufferSize) {
+    int defaultBufferSize = getConf().getInt("io.file.buffer.size", 4096);
+    int proportionalBufferSize = bufferSize / bytesPerSum;
+    return Math.max(bytesPerSum,
+                    Math.max(proportionalBufferSize, defaultBufferSize));
+  }
+
+  /*******************************************************
+   * For open()'s FSInputStream
+   * It verifies that data matches checksums.
+   *******************************************************/
+  private static class FSInputChecker extends FSInputStream {
+    public static final Log LOG 
+      = LogFactory.getLog("org.apache.hadoop.fs.FSInputChecker");
+    
+    private ChecksumFileSystem fs;
+    private Path file;
+    private FSDataInputStream datas;
+    private FSDataInputStream sums;
+    private Checksum sum = new CRC32();
+    private int inSum;
+    
+    private static final int HEADER_LENGTH = 8;
+    
+    private int bytesPerSum = 1;
+    
+    public FSInputChecker(ChecksumFileSystem fs, Path file)
+      throws IOException {
+      this(fs, file, fs.getConf().getInt("io.file.buffer.size", 4096));
+    }
+    
+    public FSInputChecker(ChecksumFileSystem fs, Path file, int bufferSize)
+      throws IOException {
+      // open with an extremly small buffer size,
+      // so that the buffer could be by-passed by the buffer in FSDataInputStream
+      datas = fs.getRawFileSystem().open(file, 1);
+      this.fs = fs;
+      this.file = file;
+      Path sumFile = fs.getChecksumFile(file);
+      try {
+        int sumBufferSize = fs.getSumBufferSize(fs.getBytesPerSum(),bufferSize);
+        sums = fs.getRawFileSystem().open(sumFile, sumBufferSize);
+
+        byte[] version = new byte[CHECKSUM_VERSION.length];
+        sums.readFully(version);
+        if (!Arrays.equals(version, CHECKSUM_VERSION))
+          throw new IOException("Not a checksum file: "+sumFile);
+        bytesPerSum = sums.readInt();
+      } catch (FileNotFoundException e) {         // quietly ignore
+        stopSumming();
+      } catch (IOException e) {                   // loudly ignore
+        LOG.warn("Problem opening checksum file: "+ file + 
+                 ".  Ignoring exception: " + 
+                 StringUtils.stringifyException(e));
+        stopSumming();
+      }
+    }
+
+    public void seek(long desired) throws IOException {
+      // seek to a checksum boundary
+      long checksumBoundary = desired/bytesPerSum*bytesPerSum;
+      if(checksumBoundary != getPos()) {
+        datas.seek(checksumBoundary);
+        sums.seek(HEADER_LENGTH + 4*(checksumBoundary/bytesPerSum));
+      }
+      
+      sum.reset();
+      inSum = 0;
+      
+      // scan to desired position
+      int delta = (int)(desired - checksumBoundary);
+      readBuffer(new byte[delta], 0, delta);
+    }
+    
+    public int read() throws IOException {
+      byte[] b = new byte[1];
+      readBuffer(b, 0, 1);
+      return b[0] & 0xff;
+    }
+
+    public int read(byte b[]) throws IOException {
+      return read(b, 0, b.length);
+    }
+
+    public int read(byte b[], int off, int len) throws IOException {
+      // make sure that it ends at a checksum boundary
+      long curPos = getPos();
+      long endPos = len+curPos/bytesPerSum*bytesPerSum;
+      return readBuffer(b, off, (int)(endPos-curPos));
+    }
+    
+    private int readBuffer(byte b[], int off, int len) throws IOException {
+      int read;
+      boolean retry;
+      int retriesLeft = 3;
+      long oldPos = getPos();
+      do {
+        retriesLeft--;
+        retry = false;
+        
+        read = 0;
+        boolean endOfFile=false;
+        while (read < len && !endOfFile) {
+          int count = datas.read(b, off + read, len - read);
+          if (count < 0)
+            endOfFile = true;
+          else
+            read += count;
+        }
+        
+        if (sums != null && read!=0) {
+          long oldSumsPos = sums.getPos();
+          try {
+            int summed = 0;
+            while (summed < read) {
+              int goal = bytesPerSum - inSum;
+              int inBuf = read - summed;
+              int toSum = inBuf <= goal ? inBuf : goal;
+              
+              try {
+                sum.update(b, off+summed, toSum);
+              } catch (ArrayIndexOutOfBoundsException e) {
+                throw new RuntimeException("Summer buffer overflow b.len=" + 
+                                           b.length + ", off=" + off + 
+                                           ", summed=" + summed + ", read=" + 
+                                           read + ", bytesPerSum=" + bytesPerSum +
+                                           ", inSum=" + inSum, e);
+              }
+              summed += toSum;
+              
+              inSum += toSum;
+              if (inSum == bytesPerSum || endOfFile) {
+                verifySum(read-(summed-bytesPerSum));
+              }
+            }
+          } catch (ChecksumException ce) {
+            LOG.info("Found checksum error: "+StringUtils.stringifyException(ce));
+            if (retriesLeft == 0) {
+              throw ce;
+            }
+            
+            sums.seek(oldSumsPos);
+            datas.seek(oldPos);
+            
+            if (seekToNewSource(oldPos)) {
+              // Neither the data stream nor the checksum stream are being read
+              // from different sources, meaning we'll still get a checksum error 
+              // if we try to do the read again.  We throw an exception instead.
+              throw ce;
+            } else {
+              // Since at least one of the sources is different, 
+              // the read might succeed, so we'll retry.
+              retry = true;
+            }
+          }
+        }
+      } while (retry);
+      return read==0?-1:read;
+    }
+    
+    private void verifySum(int delta) throws IOException {
+      int crc;
+      try {
+        crc = sums.readInt();
+      } catch (IOException e) {
+        LOG.warn("Problem reading checksum file: "+e+". Ignoring.");
+        stopSumming();
+        return;
+      }
+      int sumValue = (int)sum.getValue();
+      sum.reset();
+      inSum = 0;
+      if (crc != sumValue) {
+        long pos = getPos() - delta;
+        fs.reportChecksumFailure(file, datas, pos, sums, pos/bytesPerSum);
+        throw new ChecksumException("Checksum error: "+file+" at "+pos);
+      }
+    }
+    
+    public long getPos() throws IOException {
+      return datas.getPos();
+    }
+    
+    public int read(long position, byte[] buffer, int offset, int length)
+      throws IOException {
+      return datas.read(position, buffer, offset, length);
+    }
+    
+    public void readFully(long position, byte[] buffer, int offset, int length)
+      throws IOException {
+      datas.readFully(position, buffer, offset, length);
+    }
+    
+    public void readFully(long position, byte[] buffer)
+      throws IOException {
+      datas.readFully(position, buffer);
+    }
+    
+    public void close() throws IOException {
+      datas.close();
+      stopSumming();
+    }
+    
+    private void stopSumming() {
+      if (sums != null) {
+        try {
+          sums.close();
+        } catch (IOException f) {}
+        sums = null;
+        bytesPerSum = 1;
+      }
+    }
+    
+    public int available() throws IOException {
+      return datas.available();
+    }
+    
+    public boolean markSupported() {
+      return datas.markSupported();
+    }
+    
+    public synchronized void mark(int readlimit) {
+      datas.mark(readlimit);
+    }
+    
+    public synchronized void reset() throws IOException {
+      datas.reset();
+    }
+    
+    public long skip(long n) throws IOException {
+      return datas.skip(n);
+    }
+
+    @Override
+      public boolean seekToNewSource(long targetPos) throws IOException {
+      return datas.seekToNewSource(targetPos) &&
+        sums.seekToNewSource(targetPos/bytesPerSum);
+    }
+
+  }
+
+  /**
+   * Opens an FSDataInputStream at the indicated Path.
+   * @param f the file name to open
+   * @param bufferSize the size of the buffer to be used.
+   */
+  @Override
+    public FSDataInputStream open(Path f, int bufferSize) throws IOException {
+    if (!exists(f)) {
+      throw new FileNotFoundException(f.toString());
+    }
+    return new FSDataInputStream(new FSInputChecker(this, f, bufferSize),
+                                 bufferSize);
+  }
+
+  /** This class provides an output stream for a checksummed file.
+   * It generates checksums for data. */
+  private static class FSOutputSummer extends FilterOutputStream {
+    
+    private FSDataOutputStream sums;
+    private Checksum sum = new CRC32();
+    private int inSum;
+    private int bytesPerSum;
+    
+    public FSOutputSummer(ChecksumFileSystem fs, 
+                          Path file, 
+                          boolean overwrite, 
+                          short replication,
+                          long blockSize,
+                          Configuration conf)
+      throws IOException {
+      this(fs, file, overwrite, 
+           conf.getInt("io.file.buffer.size", 4096),
+           replication, blockSize, null);
+    }
+    
+    public FSOutputSummer(ChecksumFileSystem fs, 
+                          Path file, 
+                          boolean overwrite,
+                          int bufferSize,
+                          short replication,
+                          long blockSize,
+                          Progressable progress)
+      throws IOException {
+      super(fs.getRawFileSystem().create(file, overwrite, 1, 
+                                         replication, blockSize, progress));
+      this.bytesPerSum = fs.getBytesPerSum();
+      int sumBufferSize = fs.getSumBufferSize(bytesPerSum, bufferSize);
+      this.sums = fs.getRawFileSystem().create(fs.getChecksumFile(file), true, 
+                                               sumBufferSize, replication,
+                                               blockSize);
+      sums.write(CHECKSUM_VERSION, 0, CHECKSUM_VERSION.length);
+      sums.writeInt(this.bytesPerSum);
+    }
+    
+    public void write(byte b[], int off, int len) throws IOException {
+      int summed = 0;
+      while (summed < len) {
+        
+        int goal = this.bytesPerSum - inSum;
+        int inBuf = len - summed;
+        int toSum = inBuf <= goal ? inBuf : goal;
+        
+        sum.update(b, off+summed, toSum);
+        summed += toSum;
+        
+        inSum += toSum;
+        if (inSum == this.bytesPerSum) {
+          writeSum();
+        }
+      }
+      
+      out.write(b, off, len);
+    }
+    
+    private void writeSum() throws IOException {
+      if (inSum != 0) {
+        sums.writeInt((int)sum.getValue());
+        sum.reset();
+        inSum = 0;
+      }
+    }
+    
+    public void close() throws IOException {
+      writeSum();
+      sums.close();
+      super.close();
+    }
+    
+    public static long getChecksumLength(long size, int bytesPerSum) {
+      //the checksum length is equal to size passed divided by bytesPerSum +
+      //bytes written in the beginning of the checksum file.  
+      return ((long)(Math.ceil((float)size/bytesPerSum)) + 1) * 4 + 
+        CHECKSUM_VERSION.length;  
+    }
+  }
+
+  /**
+   * Opens an FSDataOutputStream at the indicated Path with write-progress
+   * reporting.
+   * @param f the file name to open
+   * @param overwrite if a file with this name already exists, then if true,
+   *   the file will be overwritten, and if false an error will be thrown.
+   * @param bufferSize the size of the buffer to be used.
+   * @param replication required block replication for the file. 
+   */
+  @Override
+    public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize,
+                                     short replication, long blockSize, Progressable progress)
+    throws IOException {
+    if (exists(f) && !overwrite) {
+      throw new IOException("File already exists:" + f);
+    }
+    Path parent = f.getParent();
+    if (parent != null && !mkdirs(parent)) {
+      throw new IOException("Mkdirs failed to create " + parent);
+    }
+    return new FSDataOutputStream(new FSOutputSummer(this, f, overwrite,
+                                                     bufferSize, replication, blockSize, progress), bufferSize);
+  }
+
+  /**
+   * Set replication for an existing file.
+   * Implement the abstract <tt>setReplication</tt> of <tt>FileSystem</tt>
+   * @param src file name
+   * @param replication new replication
+   * @throws IOException
+   * @return true if successful;
+   *         false if file does not exist or is a directory
+   */
+  public boolean setReplication(Path src, short replication) throws IOException {
+    boolean value = fs.setReplication(src, replication);
+    if (!value)
+      return false;
+
+    Path checkFile = getChecksumFile(src);
+    if (exists(checkFile))
+      fs.setReplication(checkFile, replication);
+
+    return true;
+  }
+
+  /**
+   * Rename files/dirs
+   */
+  public boolean rename(Path src, Path dst) throws IOException {
+    if (fs.isDirectory(src)) {
+      return fs.rename(src, dst);
+    } else {
+
+      boolean value = fs.rename(src, dst);
+      if (!value)
+        return false;
+
+      Path checkFile = getChecksumFile(src);
+      if (fs.exists(checkFile)) { //try to rename checksum
+        if (fs.isDirectory(dst)) {
+          value = fs.rename(checkFile, dst);
+        } else {
+          value = fs.rename(checkFile, getChecksumFile(dst));
+        }
+      }
+
+      return value;
+    }
+  }
+
+  /**
+   * Get rid of Path f, whether a true file or dir.
+   */
+  public boolean delete(Path f) throws IOException {
+    if (fs.isDirectory(f)) {
+      return fs.delete(f);
+    } else {
+      Path checkFile = getChecksumFile(f);
+      if(fs.exists(checkFile)) {
+        fs.delete(checkFile);
+      }
+
+      return fs.delete(f);
+    }
+  }
+
+  final private static PathFilter DEFAULT_FILTER = new PathFilter() {
+      public boolean accept(Path file) {
+        return !isChecksumFile(file);
+      }
+    };
+
+  /** 
+   * Filter raw files in the given pathes using the default checksum filter. 
+   * @param files a list of paths
+   * @return a list of files under the source paths
+   * @exception IOException
+   */
+  @Override
+    public Path[] listPaths(Path[] files) throws IOException {
+    return fs.listPaths(files, DEFAULT_FILTER);
+  }
+
+  /** 
+   * Filter raw files in the given path using the default checksum filter. 
+   * @param f source path
+   * @return a list of files under the source path
+   * @exception IOException
+   */
+  public Path[] listPaths(Path f) throws IOException {
+    return fs.listPaths(f, DEFAULT_FILTER);
+  }
+
+  @Override
+    public boolean mkdirs(Path f) throws IOException {
+    return fs.mkdirs(f);
+  }
+
+  @Override
+    public void lock(Path f, boolean shared) throws IOException {
+    if (fs.isDirectory(f)) {
+      fs.lock(f, shared);
+    } else {
+      Path checkFile = getChecksumFile(f);
+      if(fs.exists(checkFile)) {
+        fs.lock(checkFile, shared);
+      }
+      fs.lock(f, shared);
+    }
+  }
+
+  @Override
+    public void release(Path f) throws IOException {
+    if (fs.isDirectory(f)) {
+      fs.release(f);
+    } else {
+      Path checkFile = getChecksumFile(f);
+      if(fs.exists(checkFile)) {
+        fs.release(getChecksumFile(f));
+      }
+      fs.release(f);
+    }
+  }
+
+  @Override
+    public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
+    throws IOException {
+    FileSystem localFs = getNamed("file:///", getConf());
+    FileUtil.copy(localFs, src, this, dst, delSrc, getConf());
+  }
+
+  /**
+   * The src file is under FS, and the dst is on the local disk.
+   * Copy it from FS control to the local dst name.
+   */
+  @Override
+    public void copyToLocalFile(boolean delSrc, Path src, Path dst)
+    throws IOException {
+    FileSystem localFs = getNamed("file:///", getConf());
+    FileUtil.copy(this, src, localFs, dst, delSrc, getConf());
+  }
+
+  /**
+   * The src file is under FS, and the dst is on the local disk.
+   * Copy it from FS control to the local dst name.
+   * If src and dst are directories, the copyCrc parameter
+   * determines whether to copy CRC files.
+   */
+  public void copyToLocalFile(Path src, Path dst, boolean copyCrc)
+    throws IOException {
+    if (!fs.isDirectory(src)) { // source is a file
+      fs.copyToLocalFile(src, dst);
+      FileSystem localFs = getNamed("file:///", getConf());
+      if (localFs instanceof ChecksumFileSystem) {
+        localFs = ((ChecksumFileSystem) localFs).getRawFileSystem();
+      }
+      if (localFs.isDirectory(dst)) {
+        dst = new Path(dst, src.getName());
+      }
+      dst = getChecksumFile(dst);
+      if (localFs.exists(dst)) { //remove old local checksum file
+        localFs.delete(dst);
+      }
+      Path checksumFile = getChecksumFile(src);
+      if (copyCrc && fs.exists(checksumFile)) { //copy checksum file
+        fs.copyToLocalFile(checksumFile, dst);
+      }
+    } else {
+      Path[] srcs = listPaths(src);
+      for (Path srcFile : srcs) {
+        copyToLocalFile(srcFile, dst, copyCrc);
+      }
+    }
+  }
+
+  @Override
+    public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
+    throws IOException {
+    return tmpLocalFile;
+  }
+
+  @Override
+    public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile)
+    throws IOException {
+    moveFromLocalFile(tmpLocalFile, fsOutputFile);
+  }
+
+  /**
+   * Report a checksum error to the file system.
+   * @param f the file name containing the error
+   * @param in the stream open on the file
+   * @param inPos the position of the beginning of the bad data in the file
+   * @param sums the stream open on the checksum file
+   * @param sumsPos the position of the beginning of the bad data in the checksum file
+   */
+  public abstract void reportChecksumFailure(Path f, FSDataInputStream in,
+                                             long inPos, FSDataInputStream sums, long sumsPos);
+}

+ 16 - 222
src/java/org/apache/hadoop/fs/FSDataInputStream.java

@@ -18,196 +18,19 @@
 package org.apache.hadoop.fs;
 
 import java.io.*;
-import java.util.Arrays;
-import java.util.zip.*;
-
-import org.apache.commons.logging.*;
 
 import org.apache.hadoop.conf.*;
-import org.apache.hadoop.util.StringUtils;
 
 /** Utility that wraps a {@link FSInputStream} in a {@link DataInputStream}
  * and buffers input through a {@link BufferedInputStream}. */
 public class FSDataInputStream extends DataInputStream
     implements Seekable, PositionedReadable {
-  private static final Log LOG =
-    LogFactory.getLog("org.apache.hadoop.fs.DataInputStream");
-
-  private static final byte[] VERSION = FSDataOutputStream.CHECKSUM_VERSION;
-  private static final int HEADER_LENGTH = 8;
-  
-  private int bytesPerSum = 1;
-  
-  /** Verify that data matches checksums. */
-  private class Checker extends FilterInputStream
-      implements Seekable, PositionedReadable {
-    private FileSystem fs;
-    private Path file;
-    private FSDataInputStream sums;
-    private Checksum sum = new CRC32();
-    private int inSum;
-    private FSInputStream sumsIn;
-
-    public Checker(FileSystem fs, Path file, Configuration conf)
-      throws IOException {
-      super(fs.openRaw(file));
-      
-      this.fs = fs;
-      this.file = file;
-      Path sumFile = FileSystem.getChecksumFile(file);
-      try {
-        sumsIn = fs.openRaw(sumFile);
-        this.sums = new FSDataInputStream(sumsIn, conf);
-        byte[] version = new byte[VERSION.length];
-        sums.readFully(version);
-        if (!Arrays.equals(version, VERSION))
-          throw new IOException("Not a checksum file: "+sumFile);
-        bytesPerSum = sums.readInt();
-      } catch (FileNotFoundException e) {         // quietly ignore
-        stopSumming();
-      } catch (IOException e) {                   // loudly ignore
-        LOG.warn("Problem opening checksum file: "+ file + 
-                 ".  Ignoring exception: " + 
-                 StringUtils.stringifyException(e));
-        stopSumming();
-      }
-    }
-
-    public void seek(long desired) throws IOException {
-      ((Seekable)in).seek(desired);
-      if (sums != null) {
-        if (desired % bytesPerSum != 0)
-          throw new IOException("Seek to non-checksummed position.");
-        try {
-          sums.seek(HEADER_LENGTH + 4*(desired/bytesPerSum));
-        } catch (IOException e) {
-          LOG.warn("Problem seeking checksum file: "+e+". Ignoring.");
-          stopSumming();
-        }
-        sum.reset();
-        inSum = 0;
-      }
-    }
-    
-    public int read(byte b[], int off, int len) throws IOException {
-      int read;
-      boolean retry;
-      int retriesLeft = 3;
-      long oldPos = getPos();
-      do {
-        retriesLeft--;
-        retry = false;
-
-        read = in.read(b, off, len);
-        
-        if (sums != null) {
-          long oldSumsPos = sums.getPos();
-          try {
-            int summed = 0;
-            while (summed < read) {
-              int goal = bytesPerSum - inSum;
-              int inBuf = read - summed;
-              int toSum = inBuf <= goal ? inBuf : goal;
-          
-              try {
-                sum.update(b, off+summed, toSum);
-              } catch (ArrayIndexOutOfBoundsException e) {
-                throw new RuntimeException("Summer buffer overflow b.len=" + 
-                                           b.length + ", off=" + off + 
-                                           ", summed=" + summed + ", read=" + 
-                                           read + ", bytesPerSum=" + bytesPerSum +
-                                           ", inSum=" + inSum, e);
-              }
-              summed += toSum;
-          
-              inSum += toSum;
-              if (inSum == bytesPerSum) {
-                verifySum(read-(summed-bytesPerSum));
-              }
-            }
-          } catch (ChecksumException ce) {
-            LOG.info("Found checksum error: " + StringUtils.stringifyException(ce));
-            if (retriesLeft == 0) {
-              throw ce;
-            }
-            sums.seek(oldSumsPos);
-            if (!((FSInputStream)in).seekToNewSource(oldPos) ||
-                !((FSInputStream)sumsIn).seekToNewSource(oldSumsPos)) {
-              // Neither the data stream nor the checksum stream are being read from
-              // different sources, meaning we'll still get a checksum error if we 
-              // try to do the read again.  We throw an exception instead.
-              throw ce;
-            } else {
-              // Since at least one of the sources is different, the read might succeed,
-              // so we'll retry.
-              retry = true;
-            }
-          }
-        }
-      } while (retry);
-      return read;
-    }
-
-    private void verifySum(int delta) throws IOException {
-      int crc;
-      try {
-        crc = sums.readInt();
-      } catch (IOException e) {
-        LOG.warn("Problem reading checksum file: "+e+". Ignoring.");
-        stopSumming();
-        return;
-      }
-      int sumValue = (int)sum.getValue();
-      sum.reset();
-      inSum = 0;
-      if (crc != sumValue) {
-        long pos = getPos() - delta;
-        fs.reportChecksumFailure(file, (FSInputStream)in,
-                                 pos, sumsIn, pos/bytesPerSum) ;
-        throw new ChecksumException("Checksum error: "+file+" at "+pos);
-      }
-    }
-
-    public long getPos() throws IOException {
-      return ((FSInputStream)in).getPos();
-    }
-
-    public int read(long position, byte[] buffer, int offset, int length)
-    throws IOException {
-      return ((FSInputStream)in).read(position, buffer, offset, length);
-    }
-    
-    public void readFully(long position, byte[] buffer, int offset, int length)
-    throws IOException {
-      ((FSInputStream)in).readFully(position, buffer, offset, length);
-    }
-    
-    public void readFully(long position, byte[] buffer)
-    throws IOException {
-      ((FSInputStream)in).readFully(position, buffer);
-    }
-
-    public void close() throws IOException {
-      super.close();
-      stopSumming();
-    }
-
-    private void stopSumming() {
-      if (sums != null) {
-        try {
-          sums.close();
-        } catch (IOException f) {}
-        sums = null;
-        bytesPerSum = 1;
-      }
-    }
-  }
 
   /** Cache the file position.  This improves performance significantly.*/
   private static class PositionCache extends FilterInputStream {
     long position;
 
-    public PositionCache(InputStream in) throws IOException {
+    public PositionCache(FSInputStream in) throws IOException {
       super(in);
     }
 
@@ -221,7 +44,7 @@ public class FSDataInputStream extends DataInputStream
     }
 
     public void seek(long desired) throws IOException {
-      ((Seekable)in).seek(desired);               // seek underlying stream
+      ((FSInputStream)in).seek(desired);          // seek underlying stream
       position = desired;                         // update position
     }
       
@@ -231,18 +54,17 @@ public class FSDataInputStream extends DataInputStream
     
     public int read(long position, byte[] buffer, int offset, int length)
     throws IOException {
-      return ((PositionedReadable)in).read(position, buffer, offset, length);
+      return ((FSInputStream)in).read(position, buffer, offset, length);
     }
     
     public void readFully(long position, byte[] buffer, int offset, int length)
     throws IOException {
-      ((PositionedReadable)in).readFully(position, buffer, offset, length);
+      ((FSInputStream)in).readFully(position, buffer, offset, length);
     }
-    
   }
 
   /** Buffer input.  This improves performance significantly.*/
-  private class Buffer extends BufferedInputStream {
+  private static class Buffer extends BufferedInputStream {
     public Buffer(PositionCache in, int bufferSize)
       throws IOException {
       super(in, bufferSize);
@@ -257,18 +79,8 @@ public class FSDataInputStream extends DataInputStream
       } else {
         this.count = 0;                           // invalidate buffer
         this.pos = 0;
-
-        long delta = desired % bytesPerSum;
-        
-        // seek to last checksummed point, if any
-        ((PositionCache)in).seek(desired - delta);
-
-        // scan to desired position
-        for (int i = 0; i < delta; i++) {
-          read();
-        }
+        ((PositionCache)in).seek(desired);
       }
-
     }
       
     public long getPos() throws IOException {     // adjust for buffer
@@ -291,40 +103,18 @@ public class FSDataInputStream extends DataInputStream
     throws IOException {
       ((PositionCache)in).readFully(position, buffer, offset, length);
     }
-}
-  
-  
-  public FSDataInputStream(FileSystem fs, Path file, int bufferSize, Configuration conf)
-      throws IOException {
-    super(null);
-    Checker chkr = new Checker(fs, file, conf);  // sets bytesPerSum
-    if (bufferSize % bytesPerSum != 0) {
-      throw new IOException("Buffer size must be multiple of " + bytesPerSum);
-    }
-    this.in = new Buffer(new PositionCache(chkr), bufferSize);
   }
+
+  protected FSInputStream inStream;
   
-  
-  public FSDataInputStream(FileSystem fs, Path file, Configuration conf)
-    throws IOException {
-    super(null);
-    int bufferSize = conf.getInt("io.file.buffer.size", 4096);
-    Checker chkr = new Checker(fs, file, conf);
-    if (bufferSize % bytesPerSum != 0) {
-      throw new IOException("Buffer size must be multiple of " + bytesPerSum);
-    }
-    this.in = new Buffer(new PositionCache(chkr), bufferSize);
-  }
-    
-  /** Construct without checksums. */
   public FSDataInputStream(FSInputStream in, Configuration conf) throws IOException {
     this(in, conf.getInt("io.file.buffer.size", 4096));
   }
-  /** Construct without checksums. */
+  
   public FSDataInputStream(FSInputStream in, int bufferSize)
     throws IOException {
-    super(null);
-    this.in = new Buffer(new PositionCache(in), bufferSize);
+    super( new Buffer(new PositionCache(in), bufferSize) );
+    this.inStream = in;
   }
   
   public synchronized void seek(long desired) throws IOException {
@@ -334,7 +124,7 @@ public class FSDataInputStream extends DataInputStream
   public long getPos() throws IOException {
     return ((Buffer)in).getPos();
   }
-
+  
   public int read(long position, byte[] buffer, int offset, int length)
   throws IOException {
     return ((Buffer)in).read(position, buffer, offset, length);
@@ -349,4 +139,8 @@ public class FSDataInputStream extends DataInputStream
   throws IOException {
     ((Buffer)in).readFully(position, buffer, 0, buffer.length);
   }
+  
+  public boolean seekToNewSource(long targetPos) throws IOException {
+    return inStream.seekToNewSource(targetPos); 
+  }
 }

+ 7 - 114
src/java/org/apache/hadoop/fs/FSDataOutputStream.java

@@ -18,89 +18,13 @@
 package org.apache.hadoop.fs;
 
 import java.io.*;
-import java.util.zip.Checksum;
-import java.util.zip.CRC32;
+
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.util.Progressable;
 
-/** Utility that wraps a {@link FSOutputStream} in a {@link DataOutputStream},
+/** Utility that wraps a {@link OutputStream} in a {@link DataOutputStream},
  * buffers output through a {@link BufferedOutputStream} and creates a checksum
  * file. */
 public class FSDataOutputStream extends DataOutputStream {
-  public static final byte[] CHECKSUM_VERSION = new byte[] {'c', 'r', 'c', 0};
-  
-  /** Store checksums for data. */
-  private static class Summer extends FilterOutputStream {
-
-    private FSDataOutputStream sums;
-    private Checksum sum = new CRC32();
-    private int inSum;
-    private int bytesPerSum;
-
-    public Summer(FileSystem fs, 
-                  Path file, 
-                  boolean overwrite, 
-                  short replication,
-                  long blockSize,
-                  Configuration conf)
-      throws IOException {
-      this(fs, file, overwrite, replication, blockSize, conf, null);
-    }
-
-    public Summer(FileSystem fs, 
-                  Path file, 
-                  boolean overwrite, 
-                  short replication,
-                  long blockSize,
-                  Configuration conf,
-                  Progressable progress)
-      throws IOException {
-      super(fs.createRaw(file, overwrite, replication, blockSize, progress));
-      this.bytesPerSum = conf.getInt("io.bytes.per.checksum", 512);
-      this.sums = new FSDataOutputStream(
-            fs.createRaw(FileSystem.getChecksumFile(file), true, 
-                         replication, blockSize), 
-            conf);
-      sums.write(CHECKSUM_VERSION, 0, CHECKSUM_VERSION.length);
-      sums.writeInt(this.bytesPerSum);
-    }
-    
-    public void write(byte b[], int off, int len) throws IOException {
-      int summed = 0;
-      while (summed < len) {
-
-        int goal = this.bytesPerSum - inSum;
-        int inBuf = len - summed;
-        int toSum = inBuf <= goal ? inBuf : goal;
-
-        sum.update(b, off+summed, toSum);
-        summed += toSum;
-
-        inSum += toSum;
-        if (inSum == this.bytesPerSum) {
-          writeSum();
-        }
-      }
-
-      out.write(b, off, len);
-    }
-
-    private void writeSum() throws IOException {
-      if (inSum != 0) {
-        sums.writeInt((int)sum.getValue());
-        sum.reset();
-        inSum = 0;
-      }
-    }
-
-    public void close() throws IOException {
-      writeSum();
-      sums.close();
-      super.close();
-    }
-
-  }
-
   private static class PositionCache extends FilterOutputStream {
     long position;
 
@@ -122,7 +46,7 @@ public class FSDataOutputStream extends DataOutputStream {
   }
 
   private static class Buffer extends BufferedOutputStream {
-    public Buffer(OutputStream out, int bufferSize) throws IOException {
+    public Buffer(PositionCache out, int bufferSize) throws IOException {
       super(out, bufferSize);
     }
 
@@ -138,50 +62,19 @@ public class FSDataOutputStream extends DataOutputStream {
         buf[count++] = (byte)b;
       }
     }
-
-  }
-
-  public FSDataOutputStream(FileSystem fs, Path file,
-                            boolean overwrite, Configuration conf,
-                            int bufferSize, short replication, long blockSize )
-  throws IOException {
-    super(new Buffer(
-            new PositionCache(
-                new Summer(fs, file, overwrite, replication, blockSize, conf)), 
-            bufferSize));
   }
 
-  public FSDataOutputStream(FileSystem fs, Path file,
-                            boolean overwrite, Configuration conf,
-                            int bufferSize, short replication, long blockSize,
-                            Progressable progress)
+  public FSDataOutputStream(OutputStream out, int bufferSize)
   throws IOException {
-    super(new Buffer(
-            new PositionCache(
-                new Summer(fs, file, overwrite, replication, blockSize, conf, progress)), 
-            bufferSize));
+    super(new Buffer(new PositionCache(out), bufferSize));
   }
   
-  /** Construct without checksums. */
-  private FSDataOutputStream(FSOutputStream out, Configuration conf) throws IOException {
+  public FSDataOutputStream(OutputStream out, Configuration conf)
+  throws IOException {
     this(out, conf.getInt("io.file.buffer.size", 4096));
   }
 
-  /** Construct without checksums. */
-  private FSDataOutputStream(FSOutputStream out, int bufferSize)
-    throws IOException {
-    super(new Buffer(new PositionCache(out), bufferSize));
-  }
-
   public long getPos() throws IOException {
     return ((Buffer)out).getPos();
   }
-
-  public static long getChecksumLength(long size, int bytesPerSum) {
-    //the checksum length is equal to size passed divided by bytesPerSum +
-    //bytes written in the beginning of the checksum file.  
-    return ((long)(Math.ceil((float)size/bytesPerSum)) + 1) * 4 + 
-            CHECKSUM_VERSION.length;  
-  }
-  
 }

+ 0 - 32
src/java/org/apache/hadoop/fs/FSOutputStream.java

@@ -1,32 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.fs;
-
-import java.io.*;
-
-/****************************************************************
- * FSOutputStream is an OutputStream that can track its position.
- *
- * @author Mike Cafarella
- *****************************************************************/
-public abstract class FSOutputStream extends OutputStream {
-    /**
-     * Return the current offset from the start of the file
-     */
-    public abstract long getPos() throws IOException;
-}

+ 157 - 250
src/java/org/apache/hadoop/fs/FileSystem.java

@@ -48,7 +48,7 @@ import org.apache.hadoop.util.*;
  * @author Mike Cafarella
  *****************************************************************/
 public abstract class FileSystem extends Configured {
-    public static final Log LOG = LogFactory.getLog("org.apache.hadoop.dfs.DistributedFileSystem");
+    public static final Log LOG = LogFactory.getLog("org.apache.hadoop.fs.FileSystem");
 
     // cache indexed by URI scheme and authority
     private static final Map<String,Map<String,FileSystem>> CACHE
@@ -194,25 +194,7 @@ public abstract class FileSystem extends Configured {
 
       return new Path(scheme+":"+"//"+authority + pathUri.getPath());
     }
-
-    /** Return the name of the checksum file associated with a file.*/
-    public static Path getChecksumFile(Path file) {
-      return new Path(file.getParent(), "."+file.getName()+".crc");
-    }
-
-    /** Return the length of the checksum file given the size of the 
-     * actual file.
-     **/
-    public static long getChecksumFileLength(long fileSize, int bytesPerSum) {
-      return FSDataOutputStream.getChecksumLength(fileSize, bytesPerSum);
-    }
     
-    /** Return true iff file is a checksum file name.*/
-    public static boolean isChecksumFile(Path file) {
-      String name = file.getName();
-      return name.startsWith(".") && name.endsWith(".crc");
-    }
-
     ///////////////////////////////////////////////////////////////
     // FileSystem
     ///////////////////////////////////////////////////////////////
@@ -252,24 +234,17 @@ public abstract class FileSystem extends Configured {
      * @param f the file name to open
      * @param bufferSize the size of the buffer to be used.
      */
-    public FSDataInputStream open(Path f, int bufferSize) throws IOException {
-      return new FSDataInputStream(this, f, bufferSize, getConf());
-    }
+    public abstract FSDataInputStream open(Path f, int bufferSize)
+    throws IOException;
     
     /**
      * Opens an FSDataInputStream at the indicated Path.
      * @param f the file to open
      */
     public FSDataInputStream open(Path f) throws IOException {
-      return new FSDataInputStream(this, f, getConf());
+      return open(f, getConf().getInt("io.file.buffer.size", 4096));
     }
 
-    /**
-     * Opens an InputStream for the indicated Path, whether local
-     * or via DFS.
-     */
-    public abstract FSInputStream openRaw(Path f) throws IOException;
-
     /**
      * Opens an FSDataOutputStream at the indicated Path.
      * Files are overwritten by default.
@@ -368,8 +343,7 @@ public abstract class FileSystem extends Configured {
                                       short replication,
                                       long blockSize
                                     ) throws IOException {
-      return new FSDataOutputStream(this, f, overwrite, getConf(), 
-                                    bufferSize, replication, blockSize );
+      return create(f, overwrite, bufferSize, replication, blockSize, null);
     }
 
     /**
@@ -381,72 +355,25 @@ public abstract class FileSystem extends Configured {
      * @param bufferSize the size of the buffer to be used.
      * @param replication required block replication for the file. 
      */
-    public FSDataOutputStream create( Path f, 
-                                      boolean overwrite,
-                                      int bufferSize,
-                                      short replication,
-                                      long blockSize,
-                                      Progressable progress
-                                    ) throws IOException {
-      return new FSDataOutputStream(this, f, overwrite, getConf(), 
-                                    bufferSize, replication, blockSize, progress );
-    }
-
-    /** Opens an OutputStream at the indicated Path.
-     * @param f the file name to open
-     * @param overwrite if a file with this name already exists, then if true,
-     *   the file will be overwritten, and if false an error will be thrown.
-     * @param replication required block replication for the file. 
-     */
-    public abstract FSOutputStream createRaw(Path f, boolean overwrite, 
-                                             short replication,
-                                             long blockSize)
-      throws IOException;
+    public abstract FSDataOutputStream create( Path f, 
+                                               boolean overwrite,
+                                               int bufferSize,
+                                               short replication,
+                                               long blockSize,
+                                               Progressable progress
+                                             ) throws IOException;
 
-    /** Opens an OutputStream at the indicated Path with write-progress
-     * reporting.
-     * @param f the file name to open
-     * @param overwrite if a file with this name already exists, then if true,
-     *   the file will be overwritten, and if false an error will be thrown.
-     * @param replication required block replication for the file. 
-     */
-    public abstract FSOutputStream createRaw(Path f, boolean overwrite, 
-                                             short replication,
-                                             long blockSize, Progressable progress)
-      throws IOException;
-    
     /**
      * Creates the given Path as a brand-new zero-length file.  If
      * create fails, or if it already existed, return false.
      */
     public boolean createNewFile(Path f) throws IOException {
-        if (exists(f)) {
-            return false;
-        } else {
-          create(f,false,getConf().getInt("io.file.buffer.size", 4096)).close();
-          return true;
-        }
-    }
-
-    /**
-     * Set replication for an existing file.
-     * 
-     * @param src file name
-     * @param replication new replication
-     * @throws IOException
-     * @return true if successful;
-     *         false if file does not exist or is a directory
-     */
-    public boolean setReplication(Path src, short replication) throws IOException {
-      boolean value = setReplicationRaw(src, replication);
-      if( ! value )
+      if (exists(f)) {
         return false;
-
-      Path checkFile = getChecksumFile(src);
-      if (exists(checkFile))
-        setReplicationRaw(checkFile, replication);
-
-      return true;
+      } else {
+        create(f, false, getConf().getInt("io.file.buffer.size", 4096)).close();
+        return true;
+      }
     }
 
     /**
@@ -467,57 +394,20 @@ public abstract class FileSystem extends Configured {
      * @return true if successful;
      *         false if file does not exist or is a directory
      */
-    public abstract boolean setReplicationRaw(Path src, short replication) throws IOException;
-
-    /**
-     * Renames Path src to Path dst.  Can take place on local fs
-     * or remote DFS.
-     */
-    public boolean rename(Path src, Path dst) throws IOException {
-      if (isDirectory(src)) {
-        return renameRaw(src, dst);
-      } else {
-
-        boolean value = renameRaw(src, dst);
-        if (!value)
-          return false;
-
-        Path checkFile = getChecksumFile(src);
-        if (exists(checkFile)) { //try to rename checksum
-          if(isDirectory(dst)) {
-            renameRaw(checkFile, dst);
-          } else {
-            renameRaw(checkFile, getChecksumFile(dst)); 
-          }
-        }
-
-        return value;
-      }
-      
-    }
+    public abstract boolean setReplication(Path src, short replication) throws IOException;
 
     /**
      * Renames Path src to Path dst.  Can take place on local fs
      * or remote DFS.
      */
-    public abstract boolean renameRaw(Path src, Path dst) throws IOException;
-
-    /** Delete a file. */
-    public boolean delete(Path f) throws IOException {
-      if (isDirectory(f)) {
-        return deleteRaw(f);
-      } else {
-        deleteRaw(getChecksumFile(f));            // try to delete checksum
-        return deleteRaw(f);
-      }
-    }
-
-    /**
-     * Deletes Path
+    public abstract boolean rename(Path src, Path dst) throws IOException;
+    
+    /** Delete a file */
+    public abstract boolean delete(Path f) throws IOException;
+    
+    /** Check if exists.
+     * @param f source file
      */
-    public abstract boolean deleteRaw(Path f) throws IOException;
-
-    /** Check if exists. */
     public abstract boolean exists(Path f) throws IOException;
 
     /** True iff the named path is a directory. */
@@ -525,11 +415,11 @@ public abstract class FileSystem extends Configured {
 
     /** True iff the named path is a regular file. */
     public boolean isFile(Path f) throws IOException {
-        if (exists(f) && ! isDirectory(f)) {
-            return true;
-        } else {
-            return false;
-        }
+      if (exists(f) && ! isDirectory(f)) {
+        return true;
+      } else {
+        return false;
+      }
     }
     
     /** The number of bytes in a file. */
@@ -540,38 +430,43 @@ public abstract class FileSystem extends Configured {
      * If <i>f</i> is a directory, return the size of the directory tree
      */
     public long getContentLength(Path f) throws IOException {
-        if (!isDirectory(f)) {
-            // f is a file
-            return getLength(f);
-        }
-            
-        // f is a diretory
-        Path[] contents = listPathsRaw(f);
-        long size = 0;
-        for(int i=0; i<contents.length; i++) {
-            size += getContentLength(contents[i]);
-        }
-        return size;
+      if (!isDirectory(f)) {
+        // f is a file
+        return getLength(f);
+      }
+      
+      // f is a diretory
+      Path[] contents = listPaths(f);
+      long size = 0;
+      for(int i=0; i<contents.length; i++) {
+        size += getContentLength(contents[i]);
+      }
+      return size;
     }
 
     final private static PathFilter DEFAULT_FILTER = new PathFilter() {
       public boolean accept(Path file) {
-        return !isChecksumFile(file);
+        return true;
       }     
     };
-  
-    /** List files in a directory. */
-    public Path[] listPaths(Path f) throws IOException {
-      return listPaths(f, DEFAULT_FILTER);
-    }
     
     /** List files in a directory. */
-    public abstract Path[] listPathsRaw(Path f) throws IOException;
+    public abstract Path[] listPaths(Path f) throws IOException;
+    
+    /** 
+     * Filter files in the given pathes using the default checksum filter. 
+     * @param files a list of paths
+     * @return a list of files under the source paths
+     * @exception IOException
+     */
+    public Path[] listPaths(Path[] files ) throws IOException {
+      return listPaths(files, DEFAULT_FILTER);
+    }
 
-    /** Filter raw files in a directory. */
+    /** Filter files in a directory. */
     private void listPaths(ArrayList<Path> results, Path f, PathFilter filter)
       throws IOException {
-      Path listing[] = listPathsRaw(f);
+      Path listing[] = listPaths(f);
       if (listing != null) {
         for (int i = 0; i < listing.length; i++) {
           if (filter.accept(listing[i])) {
@@ -581,25 +476,15 @@ public abstract class FileSystem extends Configured {
       }      
     }
     
-    /** Filter raw files in a directory. */
+    /** Filter files in a directory. */
     public Path[] listPaths(Path f, PathFilter filter) throws IOException {
-        ArrayList<Path> results = new ArrayList<Path>();
-        listPaths(results, f, filter);
-        return (Path[]) results.toArray(new Path[results.size()]);
-    }
-
-    /** 
-     * Filter raw files in a list directories using the default checksum filter. 
-     * @param files a list of paths
-     * @return a list of files under the source paths
-     * @exception IOException
-     */
-    public Path[] listPaths(Path[] files ) throws IOException {
-      return listPaths( files, DEFAULT_FILTER );
+      ArrayList<Path> results = new ArrayList<Path>();
+      listPaths(results, f, filter);
+      return (Path[]) results.toArray(new Path[results.size()]);
     }
     
     /** 
-     * Filter raw files in a list directories using user-supplied path filter. 
+     * Filter files in a list directories using user-supplied path filter. 
      * @param files a list of paths
      * @return a list of files under the source paths
      * @exception IOException
@@ -713,7 +598,7 @@ public abstract class FileSystem extends Configured {
       private boolean hasPattern = false;
       
       /** Default pattern character: Escape any special meaning. */
-      private static final char  PAT_ESCAPE =  '\\';
+      private static final char  PAT_ESCAPE = '\\';
       /** Default pattern character: Any single character. */
       private static final char  PAT_ANY = '.';
       /** Default pattern character: Character set close. */
@@ -740,60 +625,58 @@ public abstract class FileSystem extends Configured {
         // Validate the pattern
         len = filePattern.length();
         if (len == 0)
-            return;
+          return;
 
-        setOpen =  0;
+        setOpen = 0;
         setRange = false;
-
-        for (int i = 0;  i < len;  i++)
-        {
-            char  pCh;
-
-            // Examine a single pattern character
-            pCh = filePattern.charAt(i);            
-            if( pCh == PAT_ESCAPE ) {
-              fileRegex.append( pCh );
-              i++;
-              if (i >= len)
-                  error( "An escaped character does not present",
-                      filePattern, i);
-              pCh = filePattern.charAt(i);
-            } else if( pCh == '.' ) {
-              fileRegex.append( PAT_ESCAPE );
-            } else if( pCh == '*' ) {
-                fileRegex.append( PAT_ANY );
-                hasPattern = true;
-            } else if( pCh == '?' ) {
-                pCh = PAT_ANY ;
-                hasPattern = true;
-            } else if( pCh == '[' && setOpen == 0 ) {
-                setOpen++;
-                hasPattern = true;
-            } else if( pCh == '^' && setOpen > 0) {
-            } else if (pCh == '-'  &&  setOpen > 0) {
-                // Character set range
-                setRange = true;
-            } else if (pCh == PAT_SET_CLOSE  &&  setRange) {
-                // Incomplete character set range
-                error("Incomplete character set range", filePattern, i);
-            } else if (pCh == PAT_SET_CLOSE  &&  setOpen > 0) {
-                // End of a character set
-                if (setOpen < 2)
-                    error("Unexpected end of set", filePattern, i);
-                setOpen = 0;
-            } else if (setOpen > 0) {
-                // Normal character, or the end of a character set range
-                setOpen++;
-                setRange = false;
-            }
-            fileRegex.append( pCh );
+        
+        for (int i = 0; i < len; i++) {
+          char pCh;
+          
+          // Examine a single pattern character
+          pCh = filePattern.charAt(i);
+          if (pCh == PAT_ESCAPE) {
+            fileRegex.append(pCh);
+            i++;
+            if (i >= len)
+              error("An escaped character does not present", filePattern, i);
+            pCh = filePattern.charAt(i);
+          } else if (pCh == '.') {
+            fileRegex.append(PAT_ESCAPE);
+          } else if (pCh == '*') {
+            fileRegex.append(PAT_ANY);
+            hasPattern = true;
+          } else if (pCh == '?') {
+            pCh = PAT_ANY;
+            hasPattern = true;
+          } else if (pCh == '[' && setOpen == 0) {
+            setOpen++;
+            hasPattern = true;
+          } else if (pCh == '^' && setOpen > 0) {
+          } else if (pCh == '-' && setOpen > 0) {
+            // Character set range
+            setRange = true;
+          } else if (pCh == PAT_SET_CLOSE && setRange) {
+            // Incomplete character set range
+            error("Incomplete character set range", filePattern, i);
+          } else if (pCh == PAT_SET_CLOSE && setOpen > 0) {
+            // End of a character set
+            if (setOpen < 2)
+              error("Unexpected end of set", filePattern, i);
+            setOpen = 0;
+          } else if (setOpen > 0) {
+            // Normal character, or the end of a character set range
+            setOpen++;
+            setRange = false;
+          }
+          fileRegex.append(pCh);
         }
-
+        
         // Check for a well-formed pattern
-        if (setOpen > 0  ||  setRange)
-        {
-            // Incomplete character set or character range
-            error("Expecting set closure character or end of range", filePattern, len);
+        if (setOpen > 0 || setRange) {
+          // Incomplete character set or character range
+          error("Expecting set closure character or end of range", filePattern,
+              len);
         }
         regex = Pattern.compile(fileRegex.toString());
       }
@@ -808,13 +691,14 @@ public abstract class FileSystem extends Configured {
       
       private void error(String s, String pattern, int pos) throws IOException {
         throw new IOException("Illegal file pattern: "
-                                 +s+" for glob "+pattern + " at " + pos);
+                                 +s+ " for glob "+ pattern + " at " + pos);
       }
     }
     
     /**
-     * Set the current working directory for the given file system.
-     * All relative paths will be resolved relative to it.
+     * Set the current working directory for the given file system. All relative
+     * paths will be resolved relative to it.
+     * 
      * @param new_dir
      */
     public abstract void setWorkingDirectory(Path new_dir);
@@ -852,30 +736,52 @@ public abstract class FileSystem extends Configured {
      * The src file is on the local disk.  Add it to FS at
      * the given dst name and the source is kept intact afterwards
      */
-    public abstract void copyFromLocalFile(Path src, Path dst) throws IOException;
+    public void copyFromLocalFile(Path src, Path dst)
+    throws IOException {
+      copyFromLocalFile(false, src, dst);
+    }
 
     /**
      * The src file is on the local disk.  Add it to FS at
      * the given dst name, removing the source afterwards.
      */
-    public abstract void moveFromLocalFile(Path src, Path dst) throws IOException;
+    public void moveFromLocalFile(Path src, Path dst)
+    throws IOException {
+      copyFromLocalFile(true, src, dst);
+    }
 
+    /**
+     * The src file is on the local disk.  Add it to FS at
+     * the given dst name.
+     * delSrc indicates if the source should be removed
+     */
+    public abstract void copyFromLocalFile(boolean delSrc, Path src, Path dst)
+    throws IOException;
+    
     /**
      * The src file is under FS, and the dst is on the local disk.
      * Copy it from FS control to the local dst name.
-     * If src and dst are directories, copy crc files as well.
      */
     public void copyToLocalFile(Path src, Path dst) throws IOException {
-      copyToLocalFile(src, dst, true);
+      copyToLocalFile(false, src, dst);
     }
     
     /**
      * The src file is under FS, and the dst is on the local disk.
      * Copy it from FS control to the local dst name.
-     * If src and dst are directories, the copyCrc parameter
-     * determines whether to copy CRC files.
+     * Remove the source afterwards
      */
-    public abstract void copyToLocalFile(Path src, Path dst, boolean copyCrc) throws IOException;
+    public void moveToLocalFile(Path src, Path dst) throws IOException {
+      copyToLocalFile(true, src, dst);
+    }
+
+    /**
+     * The src file is under FS, and the dst is on the local disk.
+     * Copy it from FS control to the local dst name.
+     * delSrc indicates if the src will be removed or not.
+     */   
+    public abstract void copyToLocalFile(boolean delSrc, Path src, Path dst)
+    throws IOException;
 
     /**
      * Returns a local File that the user can write output to.  The caller
@@ -907,20 +813,18 @@ public abstract class FileSystem extends Configured {
       }
     }
 
-    /**
-     * Report a checksum error to the file system.
-     * @param f the file name containing the error
-     * @param in the stream open on the file
-     * @param inPos the position of the beginning of the bad data in the file
-     * @param sums the stream open on the checksum file
-     * @param sumsPos the position of the beginning of the bad data in the checksum file
-     */
-    public abstract void reportChecksumFailure(Path f, 
-                                               FSInputStream in, long inPos, 
-                                               FSInputStream sums, long sumsPos);
+    /** Return the total size of all files in the filesystem.*/
+    public long getUsed() throws IOException{
+      long used = 0;
+      Path[] files = listPaths(new Path("/"));
+      for(Path file:files){
+        used += getContentLength(file);
+      }
+      return used;
+    }
 
     /**
-     * Get the size for a particular file.
+     * Get the block size for a particular file.
      * @param f the filename
      * @return the number of bytes in a block
      */
@@ -928,7 +832,10 @@ public abstract class FileSystem extends Configured {
     
     /** Return the number of bytes that large input files should be optimally
      * be split into to minimize i/o time. */
-    public abstract long getDefaultBlockSize();
+    public long getDefaultBlockSize() {
+      // default to 32MB: large enough to minimize the impact of seeks
+      return getConf().getLong("fs.local.block.size", 32 * 1024 * 1024);
+    }
     
     /**
      * Get the default replication.

+ 26 - 45
src/java/org/apache/hadoop/fs/FileUtil.java

@@ -67,55 +67,36 @@ public class FileUtil {
                              FileSystem dstFS, Path dst, 
                              boolean deleteSource,
                              Configuration conf ) throws IOException {
-    return copy(srcFS, src, dstFS, dst, deleteSource, true, conf);
-  
-  }
-
-  /** Copy files between FileSystems. */
-  public static boolean copy(FileSystem srcFS, Path src, 
-                             FileSystem dstFS, Path dst, 
-                             boolean deleteSource,
-                             boolean copyCrc,
-                             Configuration conf ) throws IOException {
-    dst = checkDest(src.getName(), dstFS, dst);
+      dst = checkDest(src.getName(), dstFS, dst);
 
-    if (srcFS.isDirectory(src)) {
-      if (!dstFS.mkdirs(dst)) {
-        return false;
-      }
-      Path contents[] = srcFS.listPaths(src);
-      for (int i = 0; i < contents.length; i++) {
-        copy(srcFS, contents[i], dstFS, new Path(dst, contents[i].getName()),
-             deleteSource, copyCrc, conf);
-      }
-    } else if (srcFS.isFile(src)) {
-      InputStream in = srcFS.open(src);
-      try {
-        OutputStream out = (copyCrc) ?
-          dstFS.create(dst) :
-          dstFS.createRaw(dst, true, dstFS.getDefaultReplication(),
-            dstFS.getDefaultBlockSize());
-        copyContent(in, out, conf);
-      } finally {
-        in.close();
-      }
-      // if crc copying is disabled, remove the existing crc file if any
-      if (!copyCrc) {
-        Path crcFile = dstFS.getChecksumFile(dst);
-        if (dstFS.exists(crcFile)) {
-          dstFS.deleteRaw(crcFile);
+      if (srcFS.isDirectory(src)) {
+        if (!dstFS.mkdirs(dst)) {
+          return false;
+        }
+        Path contents[] = srcFS.listPaths(src);
+        for (int i = 0; i < contents.length; i++) {
+          copy(srcFS, contents[i], dstFS, new Path(dst, contents[i].getName()),
+               deleteSource, conf);
+        }
+      } else if (srcFS.isFile(src)) {
+        InputStream in = srcFS.open(src);
+        try {
+          OutputStream out = dstFS.create(dst);
+          copyContent(in, out, conf);
+        } finally {
+          in.close();
         }
+      } else {
+        throw new IOException(src.toString() + ": No such file or directory");
+      }
+      if (deleteSource) {
+        return srcFS.delete(src);
+      } else {
+        return true;
       }
-    } else {
-      throw new IOException(src.toString() + ": No such file or directory");
-    }
-    if (deleteSource) {
-      return srcFS.delete(src);
-    } else {
-      return true;
-    }
-  }
   
+  }
+
   /** Copy all files in a directory to one output file (merge). */
   public static boolean copyMerge(FileSystem srcFS, Path srcDir, 
                              FileSystem dstFS, Path dstFile, 

+ 302 - 0
src/java/org/apache/hadoop/fs/FilterFileSystem.java

@@ -0,0 +1,302 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.io.*;
+import java.net.URI;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.Progressable;
+
+/****************************************************************
+ * A <code>FilterFileSystem</code> contains
+ * some other file system, which it uses as
+ * its  basic file system, possibly transforming
+ * the data along the way or providing  additional
+ * functionality. The class <code>FilterFileSystem</code>
+ * itself simply overrides all  methods of
+ * <code>FileSystem</code> with versions that
+ * pass all requests to the contained  file
+ * system. Subclasses of <code>FilterFileSystem</code>
+ * may further override some of  these methods
+ * and may also provide additional methods
+ * and fields.
+ *
+ * @author Hairong Kuang
+ *****************************************************************/
+public class FilterFileSystem extends FileSystem {
+  
+  protected FileSystem fs;
+  
+  public FilterFileSystem( FileSystem fs) {
+    this.fs = fs;
+  }
+
+  /** Called after a new FileSystem instance is constructed.
+   * @param name a uri whose authority section names the host, port, etc.
+   *   for this FileSystem
+   * @param conf the configuration
+   */
+  public void initialize(URI name, Configuration conf) throws IOException {
+    fs.initialize(name, conf);
+  }
+
+  /** Returns a URI whose scheme and authority identify this FileSystem.*/
+  public URI getUri() {
+    return fs.getUri();
+  }
+
+  /** @deprecated call #getUri() instead.*/
+  public String getName() {
+    return fs.getName();
+  }
+
+  /** Make sure that a path specifies a FileSystem. */
+  public Path makeQualified(Path path) {
+    return fs.makeQualified(path);
+  }
+  
+  ///////////////////////////////////////////////////////////////
+  // FileSystem
+  ///////////////////////////////////////////////////////////////
+
+  /** Check that a Path belongs to this FileSystem. */
+  protected void checkPath(Path path) {
+    fs.checkPath(path);
+  }
+
+  /**
+   * Return a 2D array of size 1x1 or greater, containing hostnames 
+   * where portions of the given file can be found.  For a nonexistent 
+   * file or regions, null will be returned.
+   *
+   * This call is most helpful with DFS, where it returns 
+   * hostnames of machines that contain the given file.
+   *
+   * The FileSystem will simply return an elt containing 'localhost'.
+   */
+  public String[][] getFileCacheHints(Path f, long start, long len)
+  throws IOException {
+    return fs.getFileCacheHints(f, start, len);
+  }
+
+  /**
+   * Opens an FSDataInputStream at the indicated Path.
+   * @param f the file name to open
+   * @param bufferSize the size of the buffer to be used.
+   */
+  public FSDataInputStream open(Path f, int bufferSize) throws IOException {
+    return fs.open(f, bufferSize);
+  }
+  
+  /**
+   * Opens an FSDataOutputStream at the indicated Path with write-progress
+   * reporting.
+   * @param f the file name to open
+   * @param overwrite if a file with this name already exists, then if true,
+   *   the file will be overwritten, and if false an error will be thrown.
+   * @param bufferSize the size of the buffer to be used.
+   * @param replication required block replication for the file. 
+   */
+  public FSDataOutputStream create( Path f, 
+                                    boolean overwrite,
+                                    int bufferSize,
+                                    short replication,
+                                    long blockSize,
+                                    Progressable progress
+                                   ) throws IOException {
+    return fs.create(f, overwrite, bufferSize, replication, blockSize, progress);
+  }
+
+  /**
+   * Get replication.
+   * 
+   * @param src file name
+   * @return file replication
+   * @throws IOException
+   */
+  public short getReplication(Path src) throws IOException {
+    return fs.getReplication(src);
+  }
+
+  /**
+   * Set replication for an existing file.
+   * 
+   * @param src file name
+   * @param replication new replication
+   * @throws IOException
+   * @return true if successful;
+   *         false if file does not exist or is a directory
+   */
+  public boolean setReplication(Path src, short replication) throws IOException {
+    return fs.setReplication(src, replication);
+  }
+  
+  /**
+   * Renames Path src to Path dst.  Can take place on local fs
+   * or remote DFS.
+   */
+  public boolean rename(Path src, Path dst) throws IOException {
+    return fs.rename(src, dst);
+  }
+  
+  /** Delete a file */
+  public boolean delete(Path f) throws IOException {
+    return fs.delete(f);
+  }
+  
+  /** Check if exists.
+   * @param f source file
+   */
+  public boolean exists(Path f) throws IOException {
+    return fs.exists(f);
+  }
+
+  /** True iff the named path is a directory. */
+  public boolean isDirectory(Path f) throws IOException {
+    return fs.isDirectory(f);
+  }
+
+  /** The number of bytes in a file. */
+  public long getLength(Path f) throws IOException {
+    return fs.getLength(f);
+  }
+  
+  /** List files in a directory. */
+  public Path[] listPaths(Path f) throws IOException {
+    return fs.listPaths(f);
+  }
+  
+  /**
+   * Set the current working directory for the given file system. All relative
+   * paths will be resolved relative to it.
+   * 
+   * @param newDir
+   */
+  public void setWorkingDirectory(Path newDir) {
+    fs.setWorkingDirectory(newDir);
+  }
+  
+  /**
+   * Get the current working directory for the given file system
+   * 
+   * @return the directory pathname
+   */
+  public Path getWorkingDirectory() {
+    return fs.getWorkingDirectory();
+  }
+  
+  /**
+   * Make the given file and all non-existent parents into directories. Has
+   * the semantics of Unix 'mkdir -p'. Existence of the directory hierarchy is
+   * not an error.
+   */
+  public boolean mkdirs(Path f) throws IOException {
+    return fs.mkdirs(f);
+  }
+
+  /**
+   * Obtain a lock on the given Path
+   * 
+   * @deprecated FS does not support file locks anymore.
+   */
+  @Deprecated
+  public void lock(Path f, boolean shared) throws IOException {
+    fs.lock(f, shared);
+  }
+
+  /**
+   * Release the lock
+   * 
+   * @deprecated FS does not support file locks anymore.     
+   */
+  @Deprecated
+  public void release(Path f) throws IOException {
+    fs.release(f);
+  }
+
+  /**
+   * The src file is on the local disk.  Add it to FS at
+   * the given dst name.
+   * delSrc indicates if the source should be removed
+   */
+  public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
+  throws IOException {
+    fs.copyFromLocalFile(delSrc, src, dst);
+  }
+  
+  /**
+   * The src file is under FS, and the dst is on the local disk.
+   * Copy it from FS control to the local dst name.
+   * delSrc indicates if the src will be removed or not.
+   */   
+  public void copyToLocalFile(boolean delSrc, Path src, Path dst)
+  throws IOException {
+    fs.copyToLocalFile(delSrc, src, dst);
+  }
+  
+  /**
+   * Returns a local File that the user can write output to.  The caller
+   * provides both the eventual FS target name and the local working
+   * file.  If the FS is local, we write directly into the target.  If
+   * the FS is remote, we write into the tmp local area.
+   */
+  public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
+  throws IOException {
+    return fs.startLocalOutput(fsOutputFile, tmpLocalFile);
+  }
+
+  /**
+   * Called when we're all done writing to the target.  A local FS will
+   * do nothing, because we've written to exactly the right place.  A remote
+   * FS will copy the contents of tmpLocalFile to the correct target at
+   * fsOutputFile.
+   */
+  public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile)
+  throws IOException {
+    fs.completeLocalOutput(fsOutputFile, tmpLocalFile);
+  }
+
+  /**
+   * Get the block size for a particular file.
+   * @param f the filename
+   * @return the number of bytes in a block
+   */
+  public long getBlockSize(Path f) throws IOException {
+    return fs.getBlockSize(f);
+  }
+  
+  /** Return the number of bytes that large input files should be optimally
+   * be split into to minimize i/o time. */
+  public long getDefaultBlockSize() {
+    return fs.getDefaultBlockSize();
+  }
+  
+  /**
+   * Get the default replication.
+   */
+  public short getDefaultReplication() {
+    return fs.getDefaultReplication();
+  }
+
+  @Override
+  public Configuration getConf() {
+    return fs.getConf();
+  }
+}

+ 3 - 2
src/java/org/apache/hadoop/fs/FsShell.java

@@ -21,6 +21,7 @@ import java.io.*;
 import java.text.*;
 
 import org.apache.hadoop.conf.*;
+import org.apache.hadoop.dfs.DistributedFileSystem;
 import org.apache.hadoop.ipc.*;
 import org.apache.hadoop.util.ToolBase;
 
@@ -142,7 +143,7 @@ public class FsShell extends ToolBase {
         }
         Path dst = new Path( dstf );
         for( int i=0; i<srcs.length; i++ ) {
-          fs.copyToLocalFile( srcs[i], dst, copyCrc );
+          ((DistributedFileSystem)fs).copyToLocalFile(srcs[i], dst, copyCrc);
         }
       }
     }
@@ -497,7 +498,7 @@ public class FsShell extends ToolBase {
             + "destination should be a directory." );
       }
       for( int i=0; i<srcs.length; i++ ) {
-        FileUtil.copy(fs, srcs[i], fs, dst, false, true, conf);
+        FileUtil.copy(fs, srcs[i], fs, dst, false, conf);
       }
     }
 

+ 123 - 68
src/java/org/apache/hadoop/fs/InMemoryFileSystem.java

@@ -19,6 +19,7 @@ package org.apache.hadoop.fs;
 
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.io.OutputStream;
 import java.net.URI;
 import java.util.*;
 
@@ -35,12 +36,12 @@ import org.apache.hadoop.util.Progressable;
  * @author ddas
  *
  */
-public class InMemoryFileSystem extends FileSystem {
+public class InMemoryFileSystem extends ChecksumFileSystem {
+  private static class RawInMemoryFileSystem extends FileSystem {
   private URI uri;
   private int fsSize;
   private volatile int totalUsed;
   private Path staticWorkingDir;
-  private int bytesPerSum;
   
   //pathToFileAttribs is the final place where a file is put after it is closed
   private Map <String, FileAttributes> pathToFileAttribs = 
@@ -53,19 +54,21 @@ public class InMemoryFileSystem extends FileSystem {
   private Map <String, FileAttributes> tempFileAttribs = 
     Collections.synchronizedMap(new HashMap());
   
-  public InMemoryFileSystem() {}
-  
-  public InMemoryFileSystem(URI uri, Configuration conf) {
+  public RawInMemoryFileSystem() {
+    setConf(new Configuration());
+  }
+
+  public RawInMemoryFileSystem(URI uri, Configuration conf) {
     initialize(uri, conf);
   }
   
   //inherit javadoc
   public void initialize(URI uri, Configuration conf) {
+    setConf(conf);
     int size = Integer.parseInt(conf.get("fs.inmemory.size.mb", "100"));
     this.fsSize = size * 1024 * 1024;
     this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority());
     this.staticWorkingDir = new Path(this.uri.getPath());
-    this.bytesPerSum = conf.getInt("io.bytes.per.checksum", 512);
     LOG.info("Initialized InMemoryFileSystem: " + uri.toString() + 
              " of size (in bytes): " + fsSize);
   }
@@ -98,7 +101,7 @@ public class InMemoryFileSystem extends FileSystem {
     private FileAttributes fAttr;
     
     public InMemoryInputStream(Path f) throws IOException {
-      synchronized (InMemoryFileSystem.this) {
+      synchronized (RawInMemoryFileSystem.this) {
         fAttr = pathToFileAttribs.get(getPath(f));
         if (fAttr == null) { 
           throw new FileNotFoundException("File " + f + " does not exist");
@@ -137,11 +140,11 @@ public class InMemoryFileSystem extends FileSystem {
     public long skip(long n) throws IOException { return din.skip(n); }
   }
 
-  public FSInputStream openRaw(Path f) throws IOException {
-    return new InMemoryInputStream(f);
+  public FSDataInputStream open(Path f, int bufferSize) throws IOException {
+    return new FSDataInputStream(new InMemoryInputStream(f), bufferSize);
   }
 
-  private class InMemoryOutputStream extends FSOutputStream {
+  private class InMemoryOutputStream extends OutputStream {
     private int count;
     private FileAttributes fAttr;
     private Path f;
@@ -157,7 +160,7 @@ public class InMemoryFileSystem extends FileSystem {
     }
     
     public void close() throws IOException {
-      synchronized (InMemoryFileSystem.this) {
+      synchronized (RawInMemoryFileSystem.this) {
         pathToFileAttribs.put(getPath(f), fAttr);
       }
     }
@@ -187,32 +190,28 @@ public class InMemoryFileSystem extends FileSystem {
     }
   }
   
-  public FSOutputStream createRaw(Path f, boolean overwrite, short replication,
-      long blockSize) throws IOException {
+  public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize,
+      short replication, long blockSize, Progressable progress)
+      throws IOException {
     if (exists(f) && ! overwrite) {
       throw new IOException("File already exists:"+f);
     }
     synchronized (this) {
-      FileAttributes fAttr =(FileAttributes)tempFileAttribs.remove(getPath(f));
+      FileAttributes fAttr =(FileAttributes) tempFileAttribs.remove(getPath(f));
       if (fAttr != null)
-        return createRaw(f, fAttr);
+        return create(f, fAttr);
       return null;
     }
   }
-
-  public FSOutputStream createRaw(Path f, boolean overwrite, short replication,
-      long blockSize, Progressable progress) throws IOException {
-    //ignore write-progress reporter for in-mem files
-    return createRaw(f, overwrite, replication, blockSize);
-  }
-
-  public FSOutputStream createRaw(Path f, FileAttributes fAttr) 
-  throws IOException {
-    //the path is not added into the filesystem (in the pathToFileAttribs
-    //map) until close is called on the outputstream that this method is 
-    //going to return
-    //Create an output stream out of data byte array
-    return new InMemoryOutputStream(f, fAttr);
+  
+  public FSDataOutputStream create(Path f, FileAttributes fAttr)
+      throws IOException {
+    // the path is not added into the filesystem (in the pathToFileAttribs
+    // map) until close is called on the outputstream that this method is
+    // going to return
+    // Create an output stream out of data byte array
+    return new FSDataOutputStream(new InMemoryOutputStream(f, fAttr),
+        getConf());
   }
 
   public void close() throws IOException {
@@ -236,12 +235,12 @@ public class InMemoryFileSystem extends FileSystem {
     return 1;
   }
 
-  public boolean setReplicationRaw(Path src, short replication)
+  public boolean setReplication(Path src, short replication)
       throws IOException {
     return true;
   }
 
-  public boolean renameRaw(Path src, Path dst) throws IOException {
+  public boolean rename(Path src, Path dst) throws IOException {
     synchronized (this) {
       if (exists(dst)) {
         throw new IOException ("Path " + dst + " already exists");
@@ -253,7 +252,7 @@ public class InMemoryFileSystem extends FileSystem {
     }
   }
 
-  public boolean deleteRaw(Path f) throws IOException {
+  public boolean delete(Path f) throws IOException {
     synchronized (this) {
       FileAttributes fAttr = pathToFileAttribs.remove(getPath(f));
       if (fAttr != null) {
@@ -275,7 +274,11 @@ public class InMemoryFileSystem extends FileSystem {
    * Directory operations are not supported
    */
   public boolean isDirectory(Path f) throws IOException {
-    return false;
+    return !isFile(f);
+  }
+
+  public boolean isFile(Path f) throws IOException {
+    return exists(f);
   }
 
   public long getLength(Path f) throws IOException {
@@ -287,15 +290,20 @@ public class InMemoryFileSystem extends FileSystem {
   /**
    * Directory operations are not supported
    */
-  public Path[] listPathsRaw(Path f) throws IOException {
+  public Path[] listPaths(Path f) throws IOException {
     return null;
   }
-  public void setWorkingDirectory(Path new_dir) {}
+
+  public void setWorkingDirectory(Path new_dir) {
+    staticWorkingDir = new_dir;
+  }
+  
   public Path getWorkingDirectory() {
     return staticWorkingDir;
   }
+
   public boolean mkdirs(Path f) throws IOException {
-    return false;
+    return true;
   }
   
   /** lock operations are not supported */
@@ -303,10 +311,13 @@ public class InMemoryFileSystem extends FileSystem {
   public void release(Path f) throws IOException {}
   
   /** copy/move operations are not supported */
-  public void copyFromLocalFile(Path src, Path dst) throws IOException {}
-  public void moveFromLocalFile(Path src, Path dst) throws IOException {}
-  public void copyToLocalFile(Path src, Path dst, boolean copyCrc)
-  throws IOException {}
+  public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
+      throws IOException {
+  }
+
+  public void copyToLocalFile(boolean delSrc, Path src, Path dst)
+      throws IOException {
+  }
 
   public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
       throws IOException {
@@ -317,11 +328,6 @@ public class InMemoryFileSystem extends FileSystem {
       throws IOException {
   }
 
-  public void reportChecksumFailure(Path p, FSInputStream in,
-      long inPos,
-      FSInputStream sums, long sumsPos) {
-  }
-
   public long getBlockSize(Path f) throws IOException {
     return getDefaultBlockSize();
   }
@@ -336,47 +342,30 @@ public class InMemoryFileSystem extends FileSystem {
   
   /** Some APIs exclusively for InMemoryFileSystem */
   
-  /** Register a path with its size. This will also register a checksum for 
-   * the file that the user is trying to create. This is required since none
-   * of the FileSystem APIs accept the size of the file as argument. But since
-   * it is required for us to apriori know the size of the file we are going to
-   * create, the user must call this method for each file he wants to create
-   * and reserve memory for that file. We either succeed in reserving memory
-   * for both the main file and the checksum file and return true, or return 
-   * false.
-   */
-  public boolean reserveSpaceWithCheckSum(Path f, int size) {
-    //get the size of the checksum file (we know it is going to be 'int'
-    //since this is an inmem fs with file sizes that will fit in 4 bytes)
-    int checksumSize = getChecksumFileLength(size);
+  /** Register a path with its size. */
+  public boolean reserveSpace(Path f, int size) {
     synchronized (this) {
-      if (!canFitInMemory(size + checksumSize)) return false;
+      if (!canFitInMemory(size))
+        return false;
       FileAttributes fileAttr;
-      FileAttributes checksumAttr;
       try {
         fileAttr = new FileAttributes(size);
-        checksumAttr = new FileAttributes(checksumSize);
       } catch (OutOfMemoryError o) {
         return false;
       }
-      totalUsed += size + checksumSize;
+      totalUsed += size;
       tempFileAttribs.put(getPath(f), fileAttr);
-      tempFileAttribs.put(getPath(FileSystem.getChecksumFile(f)),checksumAttr); 
       return true;
     }
   }
   
-  public int getChecksumFileLength(int size) {
-    return (int)super.getChecksumFileLength(size, bytesPerSum);
-  }
-  
   /** This API getClosedFiles could have been implemented over listPathsRaw
    * but it is an overhead to maintain directory structures for this impl of
    * the in-memory fs.
    */
   public Path[] getFiles(PathFilter filter) {
     synchronized (this) {
-      List <String> closedFilesList = new ArrayList();
+      List<String> closedFilesList = new ArrayList<String>();
       synchronized (pathToFileAttribs) {
         Set paths = pathToFileAttribs.keySet();
         if (paths == null || paths.isEmpty()) {
@@ -433,4 +422,70 @@ public class InMemoryFileSystem extends FileSystem {
       this.data = new byte[size];
     }
   }
+  }
+    
+    public InMemoryFileSystem() {
+        super(new RawInMemoryFileSystem());
+    }
+    
+    public InMemoryFileSystem(URI uri, Configuration conf) {
+        super(new RawInMemoryFileSystem(uri, conf));
+    }
+    
+    /** copy/move operations are not supported */
+    public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
+    throws IOException {}
+    public void copyToLocalFile(boolean delSrc, Path src, Path dst)
+    throws IOException {}
+    
+    public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
+    throws IOException {
+        return fsOutputFile;
+    }
+    
+    public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile)
+    throws IOException {
+    }
+    
+    public void reportChecksumFailure(Path p, FSDataInputStream in,
+            long inPos,
+            FSDataInputStream sums, long sumsPos) {
+    }
+    
+    /**
+     * Register a file with its size. This will also register a checksum for the
+     * file that the user is trying to create. This is required since none of
+     * the FileSystem APIs accept the size of the file as argument. But since it
+     * is required for us to apriori know the size of the file we are going to
+     * create, the user must call this method for each file he wants to create
+     * and reserve memory for that file. We either succeed in reserving memory
+     * for both the main file and the checksum file and return true, or return
+     * false.
+     */
+    public boolean reserveSpaceWithCheckSum(Path f, int size) {
+        // get the size of the checksum file (we know it is going to be 'int'
+        // since this is an inmem fs with file sizes that will fit in 4 bytes)
+        long checksumSize = getChecksumFileLength(f, size);
+        RawInMemoryFileSystem mfs = (RawInMemoryFileSystem)getRawFileSystem();
+        synchronized(mfs) {
+            return mfs.reserveSpace(f, size) && 
+            mfs.reserveSpace(getChecksumFile(f),
+                    (int)getChecksumFileLength(f, size));
+        }
+    }
+    public Path[] getFiles(PathFilter filter) {
+        return ((RawInMemoryFileSystem)getRawFileSystem()).getFiles(filter);
+    }
+    
+    public int getNumFiles(PathFilter filter) {
+      return ((RawInMemoryFileSystem)getRawFileSystem()).getNumFiles(filter);
+    }
+
+    public int getFSSize() {
+        return ((RawInMemoryFileSystem)getRawFileSystem()).getFSSize();
+    }
+    
+    public float getPercentUsed() {
+        return ((RawInMemoryFileSystem)getRawFileSystem()).getPercentUsed();
+    }
 }

+ 25 - 354
src/java/org/apache/hadoop/fs/LocalFileSystem.java

@@ -19,365 +19,52 @@
 package org.apache.hadoop.fs;
 
 import java.io.*;
-import java.nio.ByteBuffer;
-import java.util.*;
-import java.nio.channels.*;
 import java.net.URI;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.util.Progressable;
+import java.util.*;
 
 /****************************************************************
- * Implement the FileSystem API for the native filesystem.
+ * Implement the FileSystem API for the checksumed local filesystem.
  *
  * @author Mike Cafarella
  *****************************************************************/
-public class LocalFileSystem extends FileSystem {
+public class LocalFileSystem extends ChecksumFileSystem {
     static final URI NAME = URI.create("file:///");
 
-    private Path workingDir =
-      new Path(System.getProperty("user.dir"));
-    TreeMap sharedLockDataSet = new TreeMap();
-    TreeMap nonsharedLockDataSet = new TreeMap();
-    TreeMap lockObjSet = new TreeMap();
-    // by default use copy/delete instead of rename
-    boolean useCopyForRename = true;
-    
-    public LocalFileSystem() {}
-
-    /**
-     * Return 1x1 'localhost' cell if the file exists.
-     * Return null if otherwise.
-     */
-    public String[][] getFileCacheHints(Path f, long start, long len) throws IOException {
-        if (! exists(f)) {
-            return null;
-        } else {
-            String result[][] = new String[1][];
-            result[0] = new String[1];
-            result[0][0] = "localhost";
-            return result;
-        }
-    }
-
-    /** @deprecated */
-    public String getName() { return "local"; }
-
-    public URI getUri() { return NAME; }
-
-    public void initialize(URI uri, Configuration conf) {
-      setConf(conf);
-    }
-
-    /** Convert a path to a File. */
-    public File pathToFile(Path path) {
-      checkPath(path);
-      if (!path.isAbsolute()) {
-        path = new Path(workingDir, path);
-      }
-      return new File(path.toUri().getPath());
-    }
-
-    /*******************************************************
-     * For open()'s FSInputStream
-     *******************************************************/
-    class LocalFSFileInputStream extends FSInputStream {
-        FileInputStream fis;
-
-        public LocalFSFileInputStream(Path f) throws IOException {
-          this.fis = new FileInputStream(pathToFile(f));
-        }
-
-        public void seek(long pos) throws IOException {
-          fis.getChannel().position(pos);
-        }
-
-        public long getPos() throws IOException {
-          return fis.getChannel().position();
-        }
-
-        public boolean seekToNewSource(long targetPos) throws IOException {
-          return false;
-        }
-
-        /*
-         * Just forward to the fis
-         */
-        public int available() throws IOException { return fis.available(); }
-        public void close() throws IOException { fis.close(); }
-        public boolean markSupport() { return false; }
-
-        public int read() throws IOException {
-          try {
-            return fis.read();
-          } catch (IOException e) {               // unexpected exception
-            throw new FSError(e);                 // assume native fs error
-          }
-        }
-
-        public int read(byte[] b, int off, int len) throws IOException {
-          try {
-            return fis.read(b, off, len);
-          } catch (IOException e) {               // unexpected exception
-            throw new FSError(e);                 // assume native fs error
-          }
-        }
-
-        public int read(long position, byte[] b, int off, int len)
-        throws IOException {
-          ByteBuffer bb = ByteBuffer.wrap(b, off, len);
-          try {
-            return fis.getChannel().read(bb, position);
-          } catch (IOException e) {
-            throw new FSError(e);
-          }
-        }
-        
-        public long skip(long n) throws IOException { return fis.skip(n); }
+    public LocalFileSystem() {
+        super(new RawLocalFileSystem());
     }
     
-    public FSInputStream openRaw(Path f) throws IOException {
-        if (! exists(f)) {
-            throw new FileNotFoundException(f.toString());
-        }
-        return new LocalFSFileInputStream(f);
-    }
-
-    /*********************************************************
-     * For create()'s FSOutputStream.
-     *********************************************************/
-    class LocalFSFileOutputStream extends FSOutputStream {
-      FileOutputStream fos;
-
-      public LocalFSFileOutputStream(Path f) throws IOException {
-        this.fos = new FileOutputStream(pathToFile(f));
-      }
-
-      public long getPos() throws IOException {
-        return fos.getChannel().position();
-      }
-
-      /*
-       * Just forward to the fos
-       */
-      public void close() throws IOException { fos.close(); }
-      public void flush() throws IOException { fos.flush(); }
-
-      public void write(byte[] b, int off, int len) throws IOException {
-        try {
-          fos.write(b, off, len);
-        } catch (IOException e) {               // unexpected exception
-          throw new FSError(e);                 // assume native fs error
-        }
-      }
-      public void write(int b) throws IOException {
-        try {
-          fos.write(b);
-        } catch (IOException e) {               // unexpected exception
-          throw new FSError(e);                 // assume native fs error
-        }
-      }
-    }
-
-    public FSOutputStream createRaw(Path f, boolean overwrite, 
-                                    short replication, long blockSize)
-      throws IOException {
-        if (exists(f) && ! overwrite) {
-            throw new IOException("File already exists:"+f);
-        }
-        Path parent = f.getParent();
-        if (parent != null) {
-          if (!mkdirs(parent)) {
-            throw new IOException("Mkdirs failed to create " + parent.toString());
-          }
-        }
-        return new LocalFSFileOutputStream(f);
-    }
-
-    public FSOutputStream createRaw(Path f, boolean overwrite, 
-                                    short replication, long blockSize,
-                                    Progressable progress)
-      throws IOException {
-        // ignore write-progress reporter for local files
-        return createRaw(f, overwrite, replication, blockSize);
-    }
-    /**
-     * Replication is not supported for the local file system.
-     */
-    public short getReplication(Path f) throws IOException {
-      return 1;
+    public LocalFileSystem( FileSystem rawLocalFileSystem ) {
+        super(rawLocalFileSystem);
     }
     
-    public boolean setReplicationRaw( Path src, 
-                                      short replication
-                                    ) throws IOException {
-      return true;
-    }
-
-    public boolean renameRaw(Path src, Path dst) throws IOException {
-        if (useCopyForRename) {
-          return FileUtil.copy(this, src, this, dst, true, getConf());
-        } else return pathToFile(src).renameTo(pathToFile(dst));
-    }
-
-    public boolean deleteRaw(Path p) throws IOException {
-        File f = pathToFile(p);
-        if (f.isFile()) {
-            return f.delete();
-        } else return FileUtil.fullyDelete(f);
-    }
-
-    public boolean exists(Path f) throws IOException {
-        return pathToFile(f).exists();
+    /** Convert a path to a File. */
+    public File pathToFile(Path path) {
+      return ((RawLocalFileSystem)fs).pathToFile(path);
     }
 
-    public boolean isDirectory(Path f) throws IOException {
-        return pathToFile(f).isDirectory();
+    @Override
+    public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
+    throws IOException {
+      FileUtil.copy(this, src, this, dst, delSrc, getConf());
     }
 
-    public long getLength(Path f) throws IOException {
-        return pathToFile(f).length();
+    @Override
+    public void copyToLocalFile(boolean delSrc, Path src, Path dst)
+    throws IOException {
+      FileUtil.copy(this, src, this, dst, delSrc, getConf());
     }
 
-    public Path[] listPathsRaw(Path f) throws IOException {
-        File localf = pathToFile(f);
-        Path[] results;
-
-        if(!localf.exists())
-          return null;
-        else if(localf.isFile()) {
-          results = new Path[1];
-          results[0] = f;
-          return results;
-        } else { //directory
-          String[] names = localf.list();
-          if (names == null) {
-            return null;
-          }
-          results = new Path[names.length];
-          for (int i = 0; i < names.length; i++) {
-            results[i] = new Path(f, names[i]);
-          }
-          return results;
-        }
-    }
-    
-    /**
-     * Creates the specified directory hierarchy. Does not
-     * treat existence as an error.
-     */
-    public boolean mkdirs(Path f) throws IOException {
-      Path parent = f.getParent();
-      File p2f = pathToFile(f);
-      return (parent == null || mkdirs(parent)) &&
-             (p2f.mkdir() || p2f.isDirectory());
-    }
-    
     /**
-     * Set the working directory to the given directory.
+     * Moves files to a bad file directory on the same device, so that their
+     * storage will not be reused.
      */
-    public void setWorkingDirectory(Path newDir) {
-      workingDir = newDir;
-    }
-    
-    public Path getWorkingDirectory() {
-      return workingDir;
-    }
-    
-    /** @deprecated */ @Deprecated
-    public void lock(Path p, boolean shared) throws IOException {
-      File f = pathToFile(p);
-      f.createNewFile();
-
-      if (shared) {
-        FileInputStream lockData = new FileInputStream(f);
-        FileLock lockObj =
-          lockData.getChannel().lock(0L, Long.MAX_VALUE, shared);
-        synchronized (this) {
-          sharedLockDataSet.put(f, lockData);
-          lockObjSet.put(f, lockObj);
-        }
-      } else {
-        FileOutputStream lockData = new FileOutputStream(f);
-        FileLock lockObj = lockData.getChannel().lock(0L, Long.MAX_VALUE, shared);
-        synchronized (this) {
-          nonsharedLockDataSet.put(f, lockData);
-          lockObjSet.put(f, lockObj);
-        }
-      }
-    }
-
-    /** @deprecated */ @Deprecated
-    public void release(Path p) throws IOException {
-      File f = pathToFile(p);
-
-      FileLock lockObj;
-      FileInputStream sharedLockData;
-      FileOutputStream nonsharedLockData;
-      synchronized (this) {
-        lockObj = (FileLock) lockObjSet.remove(f);
-        sharedLockData = (FileInputStream) sharedLockDataSet.remove(f);
-        nonsharedLockData = (FileOutputStream) nonsharedLockDataSet.remove(f);
-      }
- 
-      if (lockObj == null) {
-        throw new IOException("Given target not held as lock");
-      }
-      if (sharedLockData == null && nonsharedLockData == null) {
-        throw new IOException("Given target not held as lock");
-      }
-
-      lockObj.release();
-
-      if (sharedLockData != null) {
-        sharedLockData.close();
-      } else {
-        nonsharedLockData.close();
-      }
-    }
-
-    // In the case of the local filesystem, we can just rename the file.
-    public void moveFromLocalFile(Path src, Path dst) throws IOException {
-      rename(src, dst);
-    }
-
-    // Similar to moveFromLocalFile(), except the source is kept intact.
-    public void copyFromLocalFile(Path src, Path dst) throws IOException {
-      FileUtil.copy(this, src, this, dst, false, getConf());
-    }
-
-    // We can't delete the src file in this case.  Too bad.
-    public void copyToLocalFile(Path src, Path dst, boolean copyCrc) throws IOException {
-      FileUtil.copy(this, src, this, dst, false, copyCrc, getConf());
-    }
-
-    // We can write output directly to the final location
-    public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
-      throws IOException {
-      return fsOutputFile;
-    }
-
-    // It's in the right place - nothing to do.
-    public void completeLocalOutput(Path fsWorkingFile, Path tmpLocalFile)
-      throws IOException {
-    }
-
-    public void close() throws IOException {
-        super.close();
-    }
-
-    public String toString() {
-        return "LocalFS";
-    }
-    
-
-    /** Moves files to a bad file directory on the same device, so that their
-     * storage will not be reused. */
-    public void reportChecksumFailure(Path p, FSInputStream in,
+    public void reportChecksumFailure(Path p, FSDataInputStream in,
                                       long inPos,
-                                      FSInputStream sums, long sumsPos) {
+                                      FSDataInputStream sums, long sumsPos) {
       try {
-        // canonicalize f   
-        File f = pathToFile(p).getCanonicalFile();
+        // canonicalize f
+        File f = ((RawLocalFileSystem)fs).pathToFile(p).getCanonicalFile();
       
         // find highest writable parent dir of f on the same device
         String device = new DF(f, getConf()).getMount();
@@ -402,27 +89,11 @@ public class LocalFileSystem extends FileSystem {
         f.renameTo(badFile);                      // rename it
 
         // move checksum file too
-        File checkFile = pathToFile(getChecksumFile(p));
+        File checkFile = ((RawLocalFileSystem)fs).pathToFile(getChecksumFile(p));
         checkFile.renameTo(new File(badDir, checkFile.getName()+suffix));
 
       } catch (IOException e) {
         LOG.warn("Error moving bad file " + p + ": " + e);
       }
     }
-
-    public long getDefaultBlockSize() {
-      // default to 32MB: large enough to minimize the impact of seeks
-      return getConf().getLong("fs.local.block.size", 32 * 1024 * 1024);
-    }
-
-    public long getBlockSize(Path filename) {
-      // local doesn't really do blocks, so just use the global number
-      return getDefaultBlockSize();
-    }
-    
-    public short getDefaultReplication() {
-      return 1;
-    }
-
-
 }

+ 376 - 0
src/java/org/apache/hadoop/fs/RawLocalFileSystem.java

@@ -0,0 +1,376 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.io.*;
+import java.net.URI;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileLock;
+import java.util.*;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.Progressable;
+
+/****************************************************************
+ * Implement the FileSystem API for the raw local filesystem.
+ *
+ * @author Mike Cafarella
+ *****************************************************************/
+public class RawLocalFileSystem extends FileSystem {
+  static final URI NAME = URI.create("file:///");
+  private Path workingDir =
+    new Path(System.getProperty("user.dir"));
+  TreeMap sharedLockDataSet = new TreeMap();
+  TreeMap nonsharedLockDataSet = new TreeMap();
+  TreeMap lockObjSet = new TreeMap();
+  // by default use copy/delete instead of rename
+  boolean useCopyForRename = true;
+  
+  public RawLocalFileSystem() {}
+  
+  /** Convert a path to a File. */
+  public File pathToFile(Path path) {
+    checkPath(path);
+    if (!path.isAbsolute()) {
+      path = new Path(getWorkingDirectory(), path);
+    }
+    return new File(path.toUri().getPath());
+  }
+
+  /**
+   * Return 1x1 'localhost' cell if the file exists.
+   * Return null if otherwise.
+   */
+  public String[][] getFileCacheHints(Path f, long start, long len) throws IOException {
+    if (! exists(f)) {
+      return null;
+    } else {
+      String result[][] = new String[1][];
+      result[0] = new String[1];
+      result[0][0] = "localhost";
+      return result;
+    }
+  }
+  
+  /** @deprecated */
+  public String getName() { return "local"; }
+  
+  public URI getUri() { return NAME; }
+  
+  public void initialize(URI uri, Configuration conf) {
+    setConf(conf);
+  }
+  
+  /*******************************************************
+   * For open()'s FSInputStream
+   *******************************************************/
+  class LocalFSFileInputStream extends FSInputStream {
+    FileInputStream fis;
+    
+    public LocalFSFileInputStream(Path f) throws IOException {
+      this.fis = new FileInputStream(pathToFile(f));
+    }
+    
+    public void seek(long pos) throws IOException {
+      fis.getChannel().position(pos);
+    }
+    
+    public long getPos() throws IOException {
+      return fis.getChannel().position();
+    }
+    
+    public boolean seekToNewSource(long targetPos) throws IOException {
+      return false;
+    }
+    
+    /*
+     * Just forward to the fis
+     */
+    public int available() throws IOException { return fis.available(); }
+    public void close() throws IOException { fis.close(); }
+    public boolean markSupport() { return false; }
+    
+    public int read() throws IOException {
+      try {
+        return fis.read();
+      } catch (IOException e) {                 // unexpected exception
+        throw new FSError(e);                   // assume native fs error
+      }
+    }
+    
+    public int read(byte[] b, int off, int len) throws IOException {
+      try {
+        return fis.read(b, off, len);
+      } catch (IOException e) {                 // unexpected exception
+        throw new FSError(e);                   // assume native fs error
+      }
+    }
+    
+    public int read(long position, byte[] b, int off, int len)
+    throws IOException {
+      ByteBuffer bb = ByteBuffer.wrap(b, off, len);
+      try {
+        return fis.getChannel().read(bb, position);
+      } catch (IOException e) {
+        throw new FSError(e);
+      }
+    }
+    
+    public long skip(long n) throws IOException { return fis.skip(n); }
+  }
+  
+  public FSDataInputStream open(Path f, int bufferSize) throws IOException {
+    if (!exists(f)) {
+      throw new FileNotFoundException(f.toString());
+    }
+    return new FSDataInputStream(new LocalFSFileInputStream(f), bufferSize);
+  }
+  
+  /*********************************************************
+   * For create()'s FSOutputStream.
+   *********************************************************/
+  class LocalFSFileOutputStream extends OutputStream {
+    FileOutputStream fos;
+    
+    public LocalFSFileOutputStream(Path f) throws IOException {
+      this.fos = new FileOutputStream(pathToFile(f));
+    }
+    
+    public long getPos() throws IOException {
+      return fos.getChannel().position();
+    }
+    
+    /*
+     * Just forward to the fos
+     */
+    public void close() throws IOException { fos.close(); }
+    public void flush() throws IOException { fos.flush(); }
+    public void write(byte[] b, int off, int len) throws IOException {
+      try {
+        fos.write(b, off, len);
+      } catch (IOException e) {                // unexpected exception
+        throw new FSError(e);                  // assume native fs error
+      }
+    }
+    
+    public void write(int b) throws IOException {
+      try {
+        fos.write(b);
+      } catch (IOException e) {              // unexpected exception
+        throw new FSError(e);                // assume native fs error
+      }
+    }
+  }
+  
+  public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize,
+      short replication, long blockSize, Progressable progress)
+  throws IOException {
+    if (exists(f) && ! overwrite) {
+      throw new IOException("File already exists:"+f);
+    }
+    Path parent = f.getParent();
+    if (parent != null && !mkdirs(parent)) {
+      throw new IOException("Mkdirs failed to create " + parent.toString());
+    }
+    return new FSDataOutputStream(new LocalFSFileOutputStream(f), getConf());
+  }
+  
+  /**
+   * Replication is not supported for the local file system.
+   */
+  public short getReplication(Path f) throws IOException {
+    return 1;
+  }
+  
+  /** Set the replication of the given file */
+  public boolean setReplication( Path src,
+      short replication
+  ) throws IOException {
+    return true;
+  }
+  
+  public boolean rename(Path src, Path dst) throws IOException {
+    if (useCopyForRename) {
+      return FileUtil.copy(this, src, this, dst, true, getConf());
+    } else return pathToFile(src).renameTo(pathToFile(dst));
+  }
+  
+  public boolean delete(Path p) throws IOException {
+    File f = pathToFile(p);
+    if (f.isFile()) {
+      return f.delete();
+    } else return FileUtil.fullyDelete(f);
+  }
+  
+  public boolean exists(Path f) throws IOException {
+    return pathToFile(f).exists();
+  }
+  
+  public boolean isDirectory(Path f) throws IOException {
+    return pathToFile(f).isDirectory();
+  }
+  
+  public long getLength(Path f) throws IOException {
+    return pathToFile(f).length();
+  }
+  
+  public Path[] listPaths(Path f) throws IOException {
+    File localf = pathToFile(f);
+    Path[] results;
+    
+    if(!localf.exists())
+      return null;
+    else if(localf.isFile()) {
+      results = new Path[1];
+      results[0] = f;
+      return results;
+    } else { // directory
+      String[] names = localf.list();
+      if (names == null) {
+        return null;
+      }
+      results = new Path[names.length];
+      for (int i = 0; i < names.length; i++) {
+        results[i] = new Path(f, names[i]);
+      }
+      return results;
+    }
+  }
+  
+  /**
+   * Creates the specified directory hierarchy. Does not
+   * treat existence as an error.
+   */
+  public boolean mkdirs(Path f) throws IOException {
+    Path parent = f.getParent();
+    File p2f = pathToFile(f);
+    return (parent == null || mkdirs(parent)) &&
+    (p2f.mkdir() || p2f.isDirectory());
+  }
+  
+  /**
+   * Set the working directory to the given directory.
+   */
+  @Override
+  public void setWorkingDirectory(Path newDir) {
+    workingDir = newDir;
+  }
+  
+  @Override
+  public Path getWorkingDirectory() {
+    return workingDir;
+  }
+  
+  /** @deprecated */ @Deprecated
+  public void lock(Path p, boolean shared) throws IOException {
+    File f = pathToFile(p);
+    f.createNewFile();
+    
+    if (shared) {
+      FileInputStream lockData = new FileInputStream(f);
+      FileLock lockObj =
+        lockData.getChannel().lock(0L, Long.MAX_VALUE, shared);
+      synchronized (this) {
+        sharedLockDataSet.put(f, lockData);
+        lockObjSet.put(f, lockObj);
+      }
+    } else {
+      FileOutputStream lockData = new FileOutputStream(f);
+      FileLock lockObj = lockData.getChannel().lock(0L, Long.MAX_VALUE, shared);
+      synchronized (this) {
+        nonsharedLockDataSet.put(f, lockData);
+        lockObjSet.put(f, lockObj);
+      }
+    }
+  }
+  
+  /** @deprecated */ @Deprecated
+  public void release(Path p) throws IOException {
+    File f = pathToFile(p);
+    
+    FileLock lockObj;
+    FileInputStream sharedLockData;
+    FileOutputStream nonsharedLockData;
+    synchronized (this) {
+      lockObj = (FileLock) lockObjSet.remove(f);
+      sharedLockData = (FileInputStream) sharedLockDataSet.remove(f);
+      nonsharedLockData = (FileOutputStream) nonsharedLockDataSet.remove(f);
+    }
+    
+    if (lockObj == null) {
+      throw new IOException("Given target not held as lock");
+    }
+    if (sharedLockData == null && nonsharedLockData == null) {
+      throw new IOException("Given target not held as lock");
+    }
+    
+    lockObj.release();
+    
+    if (sharedLockData != null) {
+      sharedLockData.close();
+    } else {
+      nonsharedLockData.close();
+    }
+  }
+  
+  // In the case of the local filesystem, we can just rename the file.
+  public void moveFromLocalFile(Path src, Path dst) throws IOException {
+    rename(src, dst);
+  }
+  
+  @Override
+  public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
+  throws IOException {
+    FileUtil.copy(this, src, this, dst, delSrc, getConf());
+  }
+  
+  @Override
+  public void copyToLocalFile(boolean delSrc, Path src, Path dst)
+  throws IOException {
+    FileUtil.copy(this, src, this, dst, delSrc, getConf());
+  }
+  
+  // We can write output directly to the final location
+  public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
+  throws IOException {
+    return fsOutputFile;
+  }
+  
+  // It's in the right place - nothing to do.
+  public void completeLocalOutput(Path fsWorkingFile, Path tmpLocalFile)
+  throws IOException {
+  }
+  
+  public void close() throws IOException {
+    super.close();
+  }
+  
+  public String toString() {
+    return "LocalFS";
+  }
+  
+  public long getBlockSize(Path filename) {
+    // local doesn't really do blocks, so just use the global number
+    return getDefaultBlockSize();
+  }
+  
+  public short getDefaultReplication() {
+    return 1;
+  }
+}

+ 22 - 39
src/java/org/apache/hadoop/fs/s3/S3FileSystem.java

@@ -8,8 +8,8 @@ import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSInputStream;
-import org.apache.hadoop.fs.FSOutputStream;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
@@ -152,7 +152,7 @@ public class S3FileSystem extends FileSystem {
   }
 
   @Override
-  public Path[] listPathsRaw(Path path) throws IOException {
+  public Path[] listPaths(Path path) throws IOException {
     Path absolutePath = makeAbsolute(path);
     INode inode = store.retrieveINode(absolutePath);
     if (inode == null) {
@@ -166,21 +166,14 @@ public class S3FileSystem extends FileSystem {
   }
 
   @Override
-  public FSOutputStream createRaw(Path file, boolean overwrite,
-      short replication, long blockSize) throws IOException {
-
-    return createRaw(file, overwrite, replication, blockSize, null);
-  }
-
-  @Override
-  public FSOutputStream createRaw(Path file, boolean overwrite,
+  public FSDataOutputStream create(Path file, boolean overwrite, int bufferSize,
       short replication, long blockSize, Progressable progress)
       throws IOException {
 
     INode inode = store.retrieveINode(makeAbsolute(file));
     if (inode != null) {
       if (overwrite) {
-        deleteRaw(file);
+        delete(file);
       } else {
         throw new IOException("File already exists: " + file);
       }
@@ -192,18 +185,20 @@ public class S3FileSystem extends FileSystem {
         }
       }      
     }
-    return new S3OutputStream(getConf(), store, makeAbsolute(file),
-        blockSize, progress);
+    return new FSDataOutputStream( 
+            new S3OutputStream(getConf(), store, makeAbsolute(file),
+                blockSize, progress), bufferSize );
   }
 
   @Override
-  public FSInputStream openRaw(Path path) throws IOException {
+  public FSDataInputStream open(Path path, int bufferSize) throws IOException {
     INode inode = checkFile(path);
-    return new S3InputStream(getConf(), store, inode);
+    return new FSDataInputStream( new S3InputStream(getConf(), store, inode),
+            bufferSize);
   }
 
   @Override
-  public boolean renameRaw(Path src, Path dst) throws IOException {
+  public boolean rename(Path src, Path dst) throws IOException {
     Path absoluteSrc = makeAbsolute(src);
     INode srcINode = store.retrieveINode(absoluteSrc);
     if (srcINode == null) {
@@ -228,10 +223,10 @@ public class S3FileSystem extends FileSystem {
         return false;
       }
     }
-    return renameRawRecursive(absoluteSrc, absoluteDst);
+    return renameRecursive(absoluteSrc, absoluteDst);
   }
   
-  private boolean renameRawRecursive(Path src, Path dst) throws IOException {
+  private boolean renameRecursive(Path src, Path dst) throws IOException {
     INode srcINode = store.retrieveINode(src);
     store.storeINode(dst, srcINode);
     store.deleteINode(src);
@@ -250,7 +245,7 @@ public class S3FileSystem extends FileSystem {
   }
 
   @Override
-  public boolean deleteRaw(Path path) throws IOException {
+  public boolean delete(Path path) throws IOException {
     Path absolutePath = makeAbsolute(path);
     INode inode = store.retrieveINode(absolutePath);
     if (inode == null) {
@@ -262,12 +257,12 @@ public class S3FileSystem extends FileSystem {
         store.deleteBlock(block);
       }
     } else {
-      Path[] contents = listPathsRaw(absolutePath);
+      Path[] contents = listPaths(absolutePath);
       if (contents == null) {
         return false;
       }
       for (Path p : contents) {
-        if (! deleteRaw(p)) {
+        if (! delete(p)) {
           return false;
         }
       }
@@ -305,7 +300,7 @@ public class S3FileSystem extends FileSystem {
    * us.
    */
   @Override
-  public boolean setReplicationRaw(Path path, short replication)
+  public boolean setReplication(Path path, short replication)
       throws IOException {
     return true;
   }
@@ -354,25 +349,13 @@ public class S3FileSystem extends FileSystem {
   }
 
   @Override
-  public void reportChecksumFailure(Path f, 
-                                    FSInputStream in, long inPos, 
-                                    FSInputStream sums, long sumsPos) {
-    // TODO: What to do here?
-  }
-
-  @Override
-  public void moveFromLocalFile(Path src, Path dst) throws IOException {
-    FileUtil.copy(localFs, src, this, dst, true, getConf());
-  }
-
-  @Override
-  public void copyFromLocalFile(Path src, Path dst) throws IOException {
-    FileUtil.copy(localFs, src, this, dst, false, true, getConf());
+  public void copyFromLocalFile(boolean delSrc, Path src, Path dst) throws IOException {
+    FileUtil.copy(localFs, src, this, dst, delSrc, getConf());
   }
 
   @Override
-  public void copyToLocalFile(Path src, Path dst, boolean copyCrc) throws IOException {
-    FileUtil.copy(this, src, localFs, dst, false, copyCrc, getConf());
+  public void copyToLocalFile(boolean delSrc, Path src, Path dst) throws IOException {
+    FileUtil.copy(this, src, localFs, dst, delSrc, getConf());
   }
 
   @Override

+ 1 - 3
src/java/org/apache/hadoop/fs/s3/S3OutputStream.java

@@ -9,12 +9,11 @@ import java.util.List;
 import java.util.Random;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSOutputStream;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.s3.INode.FileType;
 import org.apache.hadoop.util.Progressable;
 
-class S3OutputStream extends FSOutputStream {
+class S3OutputStream extends OutputStream {
 
   private Configuration conf;
   
@@ -70,7 +69,6 @@ class S3OutputStream extends FSOutputStream {
     return result;
   }
 
-  @Override
   public long getPos() throws IOException {
     return filePos;
   }

+ 2 - 1
src/java/org/apache/hadoop/mapred/MapOutputLocation.java

@@ -206,7 +206,8 @@ class MapOutputLocation implements Writable, MRConstants {
       
       int length = connection.getContentLength();
       int inMemFSSize = inMemFileSys.getFSSize();
-      int checksumLength = inMemFileSys.getChecksumFileLength(length);
+      int checksumLength = (int)inMemFileSys.getChecksumFileLength(
+              localFilename, length);
         
       boolean createInMem = false; 
       if (inMemFSSize > 0)  

+ 34 - 167
src/java/org/apache/hadoop/mapred/PhasedFileSystem.java

@@ -1,17 +1,16 @@
 package org.apache.hadoop.mapred;
 
 import java.io.IOException;
+import java.io.OutputStream;
 import java.util.HashMap;
 import java.util.Map;
 import java.net.URI;
-import java.net.URISyntaxException;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FSInputStream;
-import org.apache.hadoop.fs.FSOutputStream;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FilterFileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.util.Progressable;
 
@@ -27,13 +26,9 @@ import org.apache.hadoop.util.Progressable;
  * better to commit(Path) individual files when done. Otherwise
  * commit() can be used to commit all open files at once. 
  */
-public class PhasedFileSystem extends FileSystem {
-
-  private FileSystem baseFS ;
-  private URI uri;
-
+public class PhasedFileSystem extends FilterFileSystem {
   // Map from final file name to temporary file name
-  private Map<Path, FileInfo> finalNameToFileInfo = new HashMap(); 
+  private Map<Path, FileInfo> finalNameToFileInfo = new HashMap<Path, FileInfo>(); 
   
   private String jobid ; 
   private String tipid ; 
@@ -50,12 +45,12 @@ public class PhasedFileSystem extends FileSystem {
    */
   public PhasedFileSystem(FileSystem fs, String jobid, 
       String tipid, String taskid) {
-    this.baseFS = fs ; 
+    super(fs); 
     this.jobid = jobid; 
     this.tipid = tipid ; 
     this.taskid = taskid ; 
     
-    tempDir = new Path(baseFS.getConf().get("mapred.system.dir") ); 
+    tempDir = new Path(fs.getConf().get("mapred.system.dir") ); 
     this.setConf(fs.getConf());
   }
   /**
@@ -65,21 +60,14 @@ public class PhasedFileSystem extends FileSystem {
    * @param conf JobConf
    */
   public PhasedFileSystem(FileSystem fs, JobConf conf) {
-    this.baseFS = fs ; 
+    super(fs); 
     this.jobid = conf.get("mapred.job.id"); 
     this.tipid = conf.get("mapred.tip.id"); 
     this.taskid = conf.get("mapred.task.id") ; 
     
-    tempDir = new Path(baseFS.getConf().get("mapred.system.dir") );
+    tempDir = new Path(fs.getConf().get("mapred.system.dir") );
     this.setConf(fs.getConf());
   }
-  /**
-   * This Constructor should not be used in this or any derived class. 
-   * @param conf
-   */
-  protected PhasedFileSystem(Configuration conf){
-    throw new UnsupportedOperationException("Operation not supported"); 
-  }
   
   private Path setupFile(Path finalFile, boolean overwrite) throws IOException{
     if( finalNameToFileInfo.containsKey(finalFile) ){
@@ -94,8 +82,8 @@ public class PhasedFileSystem extends FileSystem {
         }catch(IOException ioe){
           // ignore if already closed
         }
-        if( baseFS.exists(fInfo.getTempPath())){
-          baseFS.delete( fInfo.getTempPath() );
+        if( fs.exists(fInfo.getTempPath())){
+          fs.delete( fInfo.getTempPath() );
         }
         finalNameToFileInfo.remove(finalFile); 
       }
@@ -111,45 +99,19 @@ public class PhasedFileSystem extends FileSystem {
     return tempPath ; 
   }
   
-  public URI getUri() {
-    return baseFS.getUri();
-  }
-
-  public void initialize(URI uri, Configuration conf) throws IOException {
-    baseFS.initialize(uri, conf);
-  }
-
-  @Override
-  public FSOutputStream createRaw(
-      Path f, boolean overwrite, short replication, long blockSize)
+  public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize,
+          short replication, long blockSize,Progressable progress)
       throws IOException {
-    
-    // for reduce output its checked in job client but lets check it anyways
-    // as tasks with side effect may write to locations not set in jobconf
-    // as output path. 
-    if( baseFS.exists(f) && !overwrite ){
-      throw new IOException("Error creating file - already exists : " + f); 
-    }
-    FSOutputStream stream = 
-      baseFS.createRaw(setupFile(f, overwrite), overwrite, replication, blockSize); 
-    finalNameToFileInfo.get(f).setOpenFileStream(stream); 
-    return stream; 
-  }
-
-  @Override
-  public FSOutputStream createRaw(
-      Path f, boolean overwrite, short replication, long blockSize,
-      Progressable progress)
-      throws IOException {
-    if( baseFS.exists(f) && !overwrite ){
+    if( fs.exists(f) && !overwrite ){
       throw new IOException("Error creating file - already exists : " + f); 
     }
-    FSOutputStream stream = 
-      baseFS.createRaw(setupFile(f, overwrite), overwrite, replication, 
+    FSDataOutputStream stream = 
+      fs.create(setupFile(f, overwrite), overwrite, bufferSize, replication, 
           blockSize, progress);
     finalNameToFileInfo.get(f).setOpenFileStream(stream); 
     return stream ; 
   }
+  
   /**
    * Commits a single file file to its final locations as passed in create* methods. 
    * If a file already exists in final location then temporary file is deleted. 
@@ -177,29 +139,29 @@ public class PhasedFileSystem extends FileSystem {
     Path tempPath = fInfo.getTempPath(); 
     // ignore .crc files 
     if(! tempPath.toString().endsWith(".crc")){
-      if( !baseFS.exists(fPath) || fInfo.isOverwrite()){
-        if(! baseFS.exists(fPath.getParent())){
-          baseFS.mkdirs(fPath.getParent());
+      if( !fs.exists(fPath) || fInfo.isOverwrite()){
+        if(!fs.exists(fPath.getParent())){
+          fs.mkdirs(fPath.getParent());
         }
         
-        if( baseFS.exists(fPath) && fInfo.isOverwrite()){
-          baseFS.delete(fPath); 
+        if( fs.exists(fPath) && fInfo.isOverwrite()){
+          fs.delete(fPath); 
         }
         
         try {
-          if( ! baseFS.rename(fInfo.getTempPath(), fPath) ){
+          if( !fs.rename(fInfo.getTempPath(), fPath) ){
             // delete the temp file if rename failed
-            baseFS.delete(fInfo.getTempPath());
+            fs.delete(fInfo.getTempPath());
           }
         }catch(IOException ioe){
           // rename failed, log error and delete temp files
           LOG.error("PhasedFileSystem failed to commit file : " + fPath 
               + " error : " + ioe.getMessage()); 
-          baseFS.delete(fInfo.getTempPath());
+          fs.delete(fInfo.getTempPath());
         }
       }else{
         // delete temp file
-        baseFS.delete(fInfo.getTempPath());
+        fs.delete(fInfo.getTempPath());
       }
       // done with the file
       if( removeFromMap ){
@@ -241,7 +203,7 @@ public class PhasedFileSystem extends FileSystem {
       }catch(IOException ioe){
         // ignore if already closed
       }
-      baseFS.delete(fInfo.getTempPath()); 
+      fs.delete(fInfo.getTempPath()); 
       if( removeFromMap ){
         finalNameToFileInfo.remove(p);
       }
@@ -261,24 +223,9 @@ public class PhasedFileSystem extends FileSystem {
     // safe to clean now
     finalNameToFileInfo.clear();
   }
-  /**
-   * Closes base file system. 
-   */
-  public void close() throws IOException { 
-    baseFS.close(); 
-  } 
   
   @Override
-  public short getReplication(
-      Path src)
-      throws IOException {
-    // keep replication same for temp file as for 
-    // final file. 
-    return baseFS.getReplication(src);
-  }
-
-  @Override
-  public boolean setReplicationRaw(
+  public boolean setReplication(
       Path src, short replication)
       throws IOException {
     // throw IOException for interface compatibility with 
@@ -287,59 +234,19 @@ public class PhasedFileSystem extends FileSystem {
   }
 
   @Override
-  public boolean renameRaw(
+  public boolean rename(
       Path src, Path dst)
       throws IOException {
     throw new UnsupportedOperationException("Operation not supported");  
   }
 
   @Override
-  public boolean deleteRaw(
+  public boolean delete(
       Path f)
       throws IOException {
     throw new UnsupportedOperationException("Operation not supported");  
   }
 
-  @Override
-  public boolean exists(Path f)
-      throws IOException {
-    return baseFS.exists(f);
-  }
-
-  @Override
-  public boolean isDirectory(Path f)
-      throws IOException {
-    return baseFS.isDirectory(f);  
-  }
-
-  @Override
-  public long getLength(Path f)
-      throws IOException {
-    return baseFS.getLength(f); 
-  }
-
-  @Override
-  public Path[] listPathsRaw(Path f)
-      throws IOException {
-    return baseFS.listPathsRaw(f);
-  }
-
-  @Override
-  public void setWorkingDirectory(Path new_dir) {
-    baseFS.setWorkingDirectory(new_dir);   
-  }
-
-  @Override
-  public Path getWorkingDirectory() {
-    return baseFS.getWorkingDirectory();  
-  }
-
-  @Override
-  public boolean mkdirs(Path f)
-      throws IOException {
-    return baseFS.mkdirs(f) ;
-  }
-
   /** @deprecated */ @Deprecated
   @Override
   public void lock(
@@ -358,21 +265,14 @@ public class PhasedFileSystem extends FileSystem {
 
   @Override
   public void copyFromLocalFile(
-      Path src, Path dst)
-      throws IOException {
-    throw new UnsupportedOperationException("Operation not supported");  
-  }
-
-  @Override
-  public void moveFromLocalFile(
-      Path src, Path dst)
+      boolean delSrc, Path src, Path dst)
       throws IOException {
     throw new UnsupportedOperationException("Operation not supported");  
   }
 
   @Override
   public void copyToLocalFile(
-      Path src, Path dst, boolean copyCrc)
+      boolean delSrc, Path src, Path dst)
       throws IOException {
     throw new UnsupportedOperationException("Operation not supported");  
   }
@@ -391,31 +291,6 @@ public class PhasedFileSystem extends FileSystem {
     throw new UnsupportedOperationException("Operation not supported");  
  }
 
-  @Override
-
-  public void reportChecksumFailure(Path f, 
-                                    FSInputStream in, long inPos, 
-                                    FSInputStream sums, long sumsPos) {
-    baseFS.reportChecksumFailure(f, in, inPos, sums, sumsPos); 
-  }
-
-  @Override
-  public long getBlockSize(
-      Path f)
-      throws IOException {
-    return baseFS.getBlockSize(f);
-  }
-
-  @Override
-  public long getDefaultBlockSize() {
-    return baseFS.getDefaultBlockSize();
-  }
-
-  @Override
-  public short getDefaultReplication() {
-    return baseFS.getDefaultReplication();
-  }
-
   @Override
   public String[][] getFileCacheHints(
       Path f, long start, long len)
@@ -428,16 +303,10 @@ public class PhasedFileSystem extends FileSystem {
     throw new UnsupportedOperationException("Operation not supported");  
   }
 
-  @Override
-  public FSInputStream openRaw(Path f)
-      throws IOException {
-    return baseFS.openRaw(f);   
-  }
-  
   private class FileInfo {
     private Path tempPath ;
     private Path finalPath ; 
-    private FSOutputStream openFileStream ; 
+    private OutputStream openFileStream ; 
     private boolean overwrite ;
     
     FileInfo(Path tempPath, Path finalPath, boolean overwrite){
@@ -445,11 +314,11 @@ public class PhasedFileSystem extends FileSystem {
       this.finalPath = finalPath ; 
       this.overwrite = overwrite; 
     }
-    public FSOutputStream getOpenFileStream() {
+    public OutputStream getOpenFileStream() {
       return openFileStream;
     }
     public void setOpenFileStream(
-        FSOutputStream openFileStream) {
+        OutputStream openFileStream) {
       this.openFileStream = openFileStream;
     }
     public Path getFinalPath() {
@@ -473,7 +342,5 @@ public class PhasedFileSystem extends FileSystem {
         Path tempPath) {
       this.tempPath = tempPath;
     }
-    
   }
-
 }

+ 4 - 6
src/java/org/apache/hadoop/util/CopyFiles.java

@@ -554,11 +554,9 @@ public class CopyFiles extends ToolBase {
         BufferedInputStream is = 
           new BufferedInputStream(connection.getInputStream());
         
-        FSDataOutputStream os = 
-          new FSDataOutputStream(destFileSys, destinationPath, true, 
-              jobConf,	bufferSize, (short)jobConf.getInt("dfs.replication", 3), 
-              jobConf.getLong("dfs.block.size", 67108864)
-          );
+        FSDataOutputStream os = destFileSys.create(destinationPath, true, 
+              bufferSize, (short)jobConf.getInt("dfs.replication", 3), 
+              jobConf.getLong("dfs.block.size", 67108864));
         
         int readBytes = 0;
         while((readBytes = is.read(buffer, 0, bufferSize)) != -1) {
@@ -624,7 +622,7 @@ public class CopyFiles extends ToolBase {
           HDFS.equalsIgnoreCase(srcListURIScheme)) {
       FileSystem fs = FileSystem.get(srcListURI, conf);
       fis = new BufferedReader(
-          new InputStreamReader(new FSDataInputStream(fs, new Path(srcListURIPath), conf))
+          new InputStreamReader(fs.open(new Path(srcListURIPath)))
           );
     } else if("http".equalsIgnoreCase(srcListURIScheme)) {
       //Copy the file 

+ 2 - 2
src/test/org/apache/hadoop/dfs/ClusterTestDFS.java

@@ -23,13 +23,13 @@ import junit.framework.AssertionFailedError;
 
 import org.apache.commons.logging.*;
 import org.apache.hadoop.fs.FSInputStream;
-import org.apache.hadoop.fs.FSOutputStream;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.io.UTF8;
 import org.apache.hadoop.conf.Configuration;
 
 import java.io.File;
 import java.io.FilenameFilter;
+import java.io.OutputStream;
 import java.net.InetSocketAddress;
 import java.util.ArrayList;
 import java.util.ListIterator;
@@ -259,7 +259,7 @@ public class ClusterTestDFS extends TestCase implements FSConstants {
         for (int iFileNumber = 0; iFileNumber < numFiles; iFileNumber++) {
           testFileName = new UTF8("/f" + iFileNumber);
           testfilesList.add(testFileName);
-          FSOutputStream nos = dfsClient.create(testFileName, false);
+          OutputStream nos = dfsClient.create(testFileName, false);
           try {
             for (long nBytesWritten = 0L;
                  nBytesWritten < nBytes;

+ 2 - 2
src/test/org/apache/hadoop/dfs/ClusterTestDFSNamespaceLogging.java

@@ -23,7 +23,6 @@ import junit.framework.AssertionFailedError;
 
 import org.apache.commons.logging.*;
 
-import org.apache.hadoop.fs.FSOutputStream;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.io.UTF8;
 import org.apache.hadoop.conf.Configuration;
@@ -34,6 +33,7 @@ import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.FileReader;
 import java.io.IOException;
+import java.io.OutputStream;
 import java.net.InetSocketAddress;
 import java.util.ArrayList;
 
@@ -223,7 +223,7 @@ public class ClusterTestDFSNamespaceLogging extends TestCase implements FSConsta
     //
     byte[] buffer = new byte[BUFFER_SIZE];
     UTF8 testFileName = new UTF8(filename); // hardcode filename
-    FSOutputStream nos;
+    OutputStream nos;
 	nos = dfsClient.create(testFileName, false);
     try {
       for (long nBytesWritten = 0L;

+ 9 - 9
src/test/org/apache/hadoop/dfs/NNBench.java

@@ -22,8 +22,8 @@ import java.io.IOException;
 import java.util.Date;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSInputStream;
-import org.apache.hadoop.fs.FSOutputStream;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobConf;
@@ -78,13 +78,13 @@ public class NNBench {
      */
     static int createWrite() {
       int exceptions = 0;
-      FSOutputStream out = null;
+      FSDataOutputStream out = null;
       boolean success = false;
       for (int index = 0; index < numFiles; index++) {
         do { // create file until is succeeds
           try {
-              out = fileSys.createRaw(
-              new Path(taskDir, "" + index), false, (short)1, bytesPerBlock);
+              out = fileSys.create(
+              new Path(taskDir, "" + index), false, 512, (short)1, bytesPerBlock);
             success = true;
           } catch (IOException ioe) { success=false; exceptions++; }
         } while (!success);
@@ -115,10 +115,10 @@ public class NNBench {
      */
     static int openRead() {
       int exceptions = 0;
-      FSInputStream in = null;
+      FSDataInputStream in = null;
       for (int index = 0; index < numFiles; index++) {
         try {
-          in = fileSys.openRaw(new Path(taskDir, "" + index));
+          in = fileSys.open(new Path(taskDir, "" + index), 512);
           long toBeRead = bytesPerFile;
           while (toBeRead > 0) {
             int nbytes = (int) Math.min(buffer.length, toBeRead);
@@ -149,7 +149,7 @@ public class NNBench {
       for (int index = 0; index < numFiles; index++) {
         do { // rename file until is succeeds
           try {
-            boolean result = fileSys.renameRaw(
+            boolean result = fileSys.rename(
               new Path(taskDir, "" + index), new Path(taskDir, "A" + index));
             success = true;
           } catch (IOException ioe) { success=false; exceptions++; }
@@ -170,7 +170,7 @@ public class NNBench {
       for (int index = 0; index < numFiles; index++) {
         do { // delete file until is succeeds
           try {
-            boolean result = fileSys.deleteRaw(new Path(taskDir, "A" + index));
+            boolean result = fileSys.delete(new Path(taskDir, "A" + index));
             success = true;
           } catch (IOException ioe) { success=false; exceptions++; }
         } while (!success);

+ 4 - 5
src/test/org/apache/hadoop/dfs/TestCheckpoint.java

@@ -20,10 +20,8 @@ package org.apache.hadoop.dfs;
 import junit.framework.TestCase;
 import java.io.*;
 import java.util.Random;
-import java.net.*;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSInputStream;
-import org.apache.hadoop.fs.FSOutputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
@@ -39,8 +37,9 @@ public class TestCheckpoint extends TestCase {
 
   private void writeFile(FileSystem fileSys, Path name, int repl)
   throws IOException {
-    FSOutputStream stm = fileSys.createRaw(name, true, (short)repl,
-        (long)blockSize);
+    FSDataOutputStream stm = fileSys.create(name, true,
+            fileSys.getConf().getInt("io.file.buffer.size", 4096),
+            (short)repl, (long)blockSize);
     byte[] buffer = new byte[fileSize];
     Random rand = new Random(seed);
     rand.nextBytes(buffer);

+ 10 - 7
src/test/org/apache/hadoop/dfs/TestDFSShell.java

@@ -23,6 +23,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FsShell;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.StringUtils;
 
 
 /**
@@ -46,7 +47,10 @@ public class TestDFSShell extends TestCase {
   public void testDFSShell() throws IOException {
     Configuration conf = new Configuration();
     MiniDFSCluster cluster = new MiniDFSCluster(65312, conf, 2, false);
-    FileSystem fileSys = cluster.getFileSystem();
+    FileSystem fs = cluster.getFileSystem();
+    assertTrue("Not a HDFS: "+fs.getUri(),
+            fs instanceof DistributedFileSystem);
+    DistributedFileSystem fileSys = (DistributedFileSystem)fs;
     FsShell shell = new FsShell();
     shell.setConf(conf);
 
@@ -60,6 +64,7 @@ public class TestDFSShell extends TestCase {
     	// Second, create a file in that directory.
     	Path myFile = new Path("/test/mkdirs/myFile");
     	writeFile(fileSys, myFile);
+        assertTrue(fileSys.exists(myFile));
 
         // Verify that we can read the file
         {
@@ -70,20 +75,19 @@ public class TestDFSShell extends TestCase {
           try {
             val = shell.run(args);
             } catch (Exception e) {
-            System.err.println("Exception raised from DFSShell.run " +
-                               e.getLocalizedMessage()); 
+            System.err.println("Exception raised from DFSShell.run: " +
+                               StringUtils.stringifyException(e)); 
           }
           assertTrue(val == 0);
         }
 
         // Verify that we can get with and without crc
         {
-          File testFile = new File(TEST_ROOT_DIR, "mkdirs/myFile");
+          File testFile = new File(TEST_ROOT_DIR, "myFile");
           File checksumFile = new File(fileSys.getChecksumFile(
               new Path(testFile.getAbsolutePath())).toString());
           testFile.delete();
           checksumFile.delete();
-          new File(TEST_ROOT_DIR, "mkdirs").delete();
           
           String[] args = new String[3];
           args[0] = "-get";
@@ -102,12 +106,11 @@ public class TestDFSShell extends TestCase {
           testFile.delete();
         }
         {
-          File testFile = new File(TEST_ROOT_DIR, "mkdirs/myFile");
+          File testFile = new File(TEST_ROOT_DIR, "myFile");
           File checksumFile = new File(fileSys.getChecksumFile(
               new Path(testFile.getAbsolutePath())).toString());
           testFile.delete();
           checksumFile.delete();
-          new File(TEST_ROOT_DIR, "mkdirs").delete();
           
           String[] args = new String[4];
           args[0] = "-get";

+ 8 - 6
src/test/org/apache/hadoop/dfs/TestDecommission.java

@@ -22,8 +22,6 @@ import java.io.*;
 import java.util.Random;
 import java.net.*;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSInputStream;
-import org.apache.hadoop.fs.FSOutputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -59,8 +57,9 @@ public class TestDecommission extends TestCase {
   private void writeFile(FileSystem fileSys, Path name, int repl)
   throws IOException {
     // create and write a file that contains three blocks of data
-    FSOutputStream stm = fileSys.createRaw(name, true, (short)repl,
-        (long)blockSize);
+    FSDataOutputStream stm = fileSys.create(name, true, 
+            fileSys.getConf().getInt("io.file.buffer.size", 4096),
+            (short)repl, (long)blockSize);
     byte[] buffer = new byte[fileSize];
     Random rand = new Random(seed);
     rand.nextBytes(buffer);
@@ -88,8 +87,11 @@ public class TestDecommission extends TestCase {
     // sleep an additional 10 seconds for the blockreports from the datanodes
     // to arrive. 
     //
-    FSInputStream is = fileSys.openRaw(name);
-    DFSClient.DFSInputStream dis = (DFSClient.DFSInputStream) is;
+    // need a raw stream
+    assertTrue("Not HDFS:"+fileSys.getUri(), fileSys instanceof DistributedFileSystem);
+        
+    DFSClient.DFSDataInputStream dis = (DFSClient.DFSDataInputStream) 
+        ((DistributedFileSystem)fileSys).getRawFileSystem().open(name);
     DatanodeInfo[][] dinfo = dis.getDataNodes();
 
     for (int blk = 0; blk < dinfo.length; blk++) { // for each block

+ 4 - 3
src/test/org/apache/hadoop/dfs/TestReplication.java

@@ -25,7 +25,7 @@ import java.net.*;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSOutputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.ipc.RPC;
@@ -51,8 +51,9 @@ public class TestReplication extends TestCase {
   private void writeFile(FileSystem fileSys, Path name, int repl)
   throws IOException {
     // create and write a file that contains three blocks of data
-    FSOutputStream stm = fileSys.createRaw(name, true, (short)repl,
-        (long)blockSize);
+    FSDataOutputStream stm = fileSys.create(name, true,
+            fileSys.getConf().getInt("io.file.buffer.size", 4096),
+            (short)repl, (long)blockSize);
     byte[] buffer = new byte[fileSize];
     Random rand = new Random(seed);
     rand.nextBytes(buffer);

+ 7 - 5
src/test/org/apache/hadoop/dfs/TestSeekBug.java

@@ -17,14 +17,13 @@
  */
 package org.apache.hadoop.dfs;
 
-import javax.swing.filechooser.FileSystemView;
 import junit.framework.TestCase;
 import java.io.*;
 import java.util.Random;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.ChecksumFileSystem;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
@@ -57,8 +56,7 @@ public class TestSeekBug extends TestCase {
   }
   
   private void seekReadFile(FileSystem fileSys, Path name) throws IOException {
-    FSInputStream stmRaw = fileSys.openRaw(name);
-    FSDataInputStream stm = new FSDataInputStream(stmRaw, 4096);
+    FSDataInputStream stm = fileSys.open(name, 4096);
     byte[] expected = new byte[ONEMB];
     Random rand = new Random(seed);
     rand.nextBytes(expected);
@@ -83,7 +81,11 @@ public class TestSeekBug extends TestCase {
    * Read some data, skip a few bytes and read more. HADOOP-922.
    */
   private void smallReadSeek(FileSystem fileSys, Path name) throws IOException {
-    FSInputStream stmRaw = fileSys.openRaw(name);
+    if (fileSys instanceof ChecksumFileSystem) {
+        fileSys = ((ChecksumFileSystem)fileSys).getRawFileSystem();
+    }
+    // Make the buffer size small to trigger code for HADOOP-922
+    FSDataInputStream stmRaw = fileSys.open(name, 1);
     byte[] expected = new byte[ONEMB];
     Random rand = new Random(seed);
     rand.nextBytes(expected);

+ 6 - 5
src/test/org/apache/hadoop/dfs/TestSmallBlock.java

@@ -21,8 +21,8 @@ import junit.framework.TestCase;
 import java.io.*;
 import java.util.Random;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSInputStream;
-import org.apache.hadoop.fs.FSOutputStream;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
@@ -38,8 +38,9 @@ public class TestSmallBlock extends TestCase {
 
   private void writeFile(FileSystem fileSys, Path name) throws IOException {
     // create and write a file that contains three blocks of data
-    FSOutputStream stm = fileSys.createRaw(name, true, (short)1,
-        (long)blockSize);
+    FSDataOutputStream stm = fileSys.create(name, true, 
+            fileSys.getConf().getInt("io.file.buffer.size", 4096),
+            (short)1, (long)blockSize);
     byte[] buffer = new byte[fileSize];
     Random rand = new Random(seed);
     rand.nextBytes(buffer);
@@ -59,7 +60,7 @@ public class TestSmallBlock extends TestCase {
   private void checkFile(FileSystem fileSys, Path name) throws IOException {
     String[][] locations = fileSys.getFileCacheHints(name, 0, fileSize);
     assertEquals("Number of blocks", fileSize, locations.length);
-    FSInputStream stm = fileSys.openRaw(name);
+    FSDataInputStream stm = fileSys.open(name);
     byte[] expected = new byte[fileSize];
     Random rand = new Random(seed);
     rand.nextBytes(expected);

+ 1 - 1
src/test/org/apache/hadoop/fs/TestLocalFileSystem.java

@@ -83,7 +83,7 @@ public class TestLocalFileSystem extends TestCase {
       fileSys.rename(file2, file1);
       
       // try reading a file
-      InputStream stm = fileSys.openRaw(file1);
+      InputStream stm = fileSys.open(file1);
       byte[] buffer = new byte[3];
       int bytesRead = stm.read(buffer, 0, 3);
       assertEquals("42\n", new String(buffer, 0, bytesRead));

+ 21 - 13
src/test/org/apache/hadoop/fs/s3/S3FileSystemBaseTest.java

@@ -6,8 +6,8 @@ import java.net.URI;
 import junit.framework.TestCase;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSInputStream;
-import org.apache.hadoop.fs.FSOutputStream;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.Path;
 
 public abstract class S3FileSystemBaseTest extends TestCase {
@@ -84,28 +84,28 @@ public abstract class S3FileSystemBaseTest extends TestCase {
   public void testListPathsRaw() throws Exception {
     Path[] testDirs = { new Path("/test/hadoop/a"), new Path("/test/hadoop/b"),
         new Path("/test/hadoop/c/1"), };
-    assertNull(s3FileSystem.listPathsRaw(testDirs[0]));
+    assertNull(s3FileSystem.listPaths(testDirs[0]));
 
     for (Path path : testDirs) {
       assertTrue(s3FileSystem.mkdirs(path));
     }
 
-    Path[] paths = s3FileSystem.listPathsRaw(new Path("/"));
+    Path[] paths = s3FileSystem.listPaths(new Path("/"));
 
     assertEquals(1, paths.length);
     assertEquals(new Path("/test"), paths[0]);
 
-    paths = s3FileSystem.listPathsRaw(new Path("/test"));
+    paths = s3FileSystem.listPaths(new Path("/test"));
     assertEquals(1, paths.length);
     assertEquals(new Path("/test/hadoop"), paths[0]);
 
-    paths = s3FileSystem.listPathsRaw(new Path("/test/hadoop"));
+    paths = s3FileSystem.listPaths(new Path("/test/hadoop"));
     assertEquals(3, paths.length);
     assertEquals(new Path("/test/hadoop/a"), paths[0]);
     assertEquals(new Path("/test/hadoop/b"), paths[1]);
     assertEquals(new Path("/test/hadoop/c"), paths[2]);
 
-    paths = s3FileSystem.listPathsRaw(new Path("/test/hadoop/a"));
+    paths = s3FileSystem.listPaths(new Path("/test/hadoop/a"));
     assertEquals(0, paths.length);
   }
 
@@ -135,7 +135,9 @@ public abstract class S3FileSystemBaseTest extends TestCase {
     
     s3FileSystem.mkdirs(path.getParent());
 
-    FSOutputStream out = s3FileSystem.createRaw(path, false, (short) 1, BLOCK_SIZE);
+    FSDataOutputStream out = s3FileSystem.create(path, false,
+            s3FileSystem.getConf().getInt("io.file.buffer.size", 4096), 
+            (short) 1, BLOCK_SIZE);
     out.write(data, 0, len);
     out.close();
 
@@ -145,7 +147,7 @@ public abstract class S3FileSystemBaseTest extends TestCase {
 
     assertEquals("Length", len, s3FileSystem.getLength(path));
 
-    FSInputStream in = s3FileSystem.openRaw(path);
+    FSDataInputStream in = s3FileSystem.open(path);
     byte[] buf = new byte[len];
 
     in.readFully(0, buf);
@@ -155,7 +157,7 @@ public abstract class S3FileSystemBaseTest extends TestCase {
       assertEquals("Position " + i, data[i], buf[i]);
     }
     
-    assertTrue("Deleted", s3FileSystem.deleteRaw(path));
+    assertTrue("Deleted", s3FileSystem.delete(path));
     
     assertFalse("No longer exists", s3FileSystem.exists(path));
 
@@ -172,13 +174,17 @@ public abstract class S3FileSystemBaseTest extends TestCase {
     assertEquals("Length", BLOCK_SIZE, s3FileSystem.getLength(path));
     
     try {
-      s3FileSystem.createRaw(path, false, (short) 1, 128);
+      s3FileSystem.create(path, false,
+              s3FileSystem.getConf().getInt("io.file.buffer.size", 4096),
+              (short) 1, 128);
       fail("Should throw IOException.");
     } catch (IOException e) {
       // Expected
     }
     
-    FSOutputStream out = s3FileSystem.createRaw(path, true, (short) 1, BLOCK_SIZE);
+    FSDataOutputStream out = s3FileSystem.create(path, true,
+            s3FileSystem.getConf().getInt("io.file.buffer.size", 4096), 
+            (short) 1, BLOCK_SIZE);
     out.write(data, 0, BLOCK_SIZE / 2);
     out.close();
     
@@ -321,7 +327,9 @@ public abstract class S3FileSystemBaseTest extends TestCase {
   }
 
   private void createEmptyFile(Path path) throws IOException {
-    FSOutputStream out = s3FileSystem.createRaw(path, false, (short) 1, BLOCK_SIZE);
+    FSDataOutputStream out = s3FileSystem.create(path, false,
+            s3FileSystem.getConf().getInt("io.file.buffer.size", 4096),
+            (short) 1, BLOCK_SIZE);
     out.write(data, 0, BLOCK_SIZE);
     out.close();
   }