Browse Source

HADOOP-4565. Improved the unit test for CombineFileInputFormat. (dhruba)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/branches/branch-0.20@742702 13f79535-47bb-0310-9956-ffa450edef68
Dhruba Borthakur 16 years ago
parent
commit
862f4ae28e

+ 15 - 13
src/mapred/org/apache/hadoop/mapred/lib/CombineFileSplit.java

@@ -179,25 +179,27 @@ public class CombineFileSplit implements InputSplit {
   }
   }
   
   
   @Override
   @Override
-  public String toString() {
-    String locs = "";
-    StringBuffer locsb = new StringBuffer();
-    if (locations != null) {
-      for (int i = 0; i < locations.length; i++) {
-        locsb.append(locations[i] + ":");
-      }
-      locs = locsb.toString();
-    }
+ public String toString() {
     StringBuffer sb = new StringBuffer();
     StringBuffer sb = new StringBuffer();
     for (int i = 0; i < paths.length; i++) {
     for (int i = 0; i < paths.length; i++) {
+      if (i == 0 ) {
+        sb.append("Paths:");
+      }
       sb.append(paths[i].toUri().getPath() + ":" + startoffset[i] +
       sb.append(paths[i].toUri().getPath() + ":" + startoffset[i] +
-                "+" + lengths[i] +
-                ":" + locs
-                );
+                "+" + lengths[i]);
       if (i < paths.length -1) {
       if (i < paths.length -1) {
-        sb.append(", ");
+        sb.append(",");
       }
       }
     }
     }
+    if (locations != null) {
+      String locs = "";
+      StringBuffer locsb = new StringBuffer();
+      for (int i = 0; i < locations.length; i++) {
+        locsb.append(locations[i] + ":");
+      }
+      locs = locsb.toString();
+      sb.append(" Locations:" + locs + "; ");
+    }
     return sb.toString();
     return sb.toString();
   }
   }
 }
 }

+ 254 - 18
src/test/org/apache/hadoop/mapred/lib/TestCombineFileInputFormat.java

@@ -37,6 +37,7 @@ import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.SequenceFile.CompressionType;
 import org.apache.hadoop.io.SequenceFile.CompressionType;
@@ -118,12 +119,14 @@ public class TestCombineFileInputFormat extends TestCase{
       if (!fileSys.mkdirs(inDir)) {
       if (!fileSys.mkdirs(inDir)) {
         throw new IOException("Mkdirs failed to create " + inDir.toString());
         throw new IOException("Mkdirs failed to create " + inDir.toString());
       }
       }
-      writeFile(conf, new Path(dir1 + "/file1"), (short)1, 1);
+      Path file1 = new Path(dir1 + "/file1");
+      writeFile(conf, file1, (short)1, 1);
       dfs.startDataNodes(conf, 1, true, null, rack2, hosts2, null);
       dfs.startDataNodes(conf, 1, true, null, rack2, hosts2, null);
       dfs.waitActive();
       dfs.waitActive();
 
 
       // create file on two datanodes.
       // create file on two datanodes.
-      writeFile(conf, new Path(dir2 + "/file2"), (short)2, 2);
+      Path file2 = new Path(dir2 + "/file2");
+      writeFile(conf, file2, (short)2, 2);
 
 
       // split it using a CombinedFile input format
       // split it using a CombinedFile input format
       DummyInputFormat inFormat = new DummyInputFormat();
       DummyInputFormat inFormat = new DummyInputFormat();
@@ -133,37 +136,117 @@ public class TestCombineFileInputFormat extends TestCase{
       System.out.println("Made splits(Test1): " + splits.length);
       System.out.println("Made splits(Test1): " + splits.length);
 
 
       // make sure that each split has different locations
       // make sure that each split has different locations
+      CombineFileSplit fileSplit = null;
       for (int i = 0; i < splits.length; ++i) {
       for (int i = 0; i < splits.length; ++i) {
-        CombineFileSplit fileSplit = (CombineFileSplit) splits[i];
+        fileSplit = (CombineFileSplit) splits[i];
         System.out.println("File split(Test1): " + fileSplit);
         System.out.println("File split(Test1): " + fileSplit);
       }
       }
       assertEquals(splits.length, 2);
       assertEquals(splits.length, 2);
+      fileSplit = (CombineFileSplit) splits[0];
+      assertEquals(fileSplit.getNumPaths(), 2);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getPath(0).getName(), file2.getName());
+      assertEquals(fileSplit.getOffset(0), 0);
+      assertEquals(fileSplit.getLength(0), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(1).getName(), file2.getName());
+      assertEquals(fileSplit.getOffset(1), BLOCKSIZE);
+      assertEquals(fileSplit.getLength(1), BLOCKSIZE);
+      assertEquals(fileSplit.getLocations()[0], "/r2");
+      fileSplit = (CombineFileSplit) splits[1];
+      assertEquals(fileSplit.getNumPaths(), 1);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getPath(0).getName(), file1.getName());
+      assertEquals(fileSplit.getOffset(0), 0);
+      assertEquals(fileSplit.getLength(0), BLOCKSIZE);
+      assertEquals(fileSplit.getLocations()[0], "/r1");
 
 
       // create another file on 3 datanodes and 3 racks.
       // create another file on 3 datanodes and 3 racks.
       dfs.startDataNodes(conf, 1, true, null, rack3, hosts3, null);
       dfs.startDataNodes(conf, 1, true, null, rack3, hosts3, null);
       dfs.waitActive();
       dfs.waitActive();
+      Path file3 = new Path(dir3 + "/file3");
       writeFile(conf, new Path(dir3 + "/file3"), (short)3, 3);
       writeFile(conf, new Path(dir3 + "/file3"), (short)3, 3);
       inFormat = new DummyInputFormat();
       inFormat = new DummyInputFormat();
       inFormat.setInputPaths(conf, dir1 + "," + dir2 + "," + dir3);
       inFormat.setInputPaths(conf, dir1 + "," + dir2 + "," + dir3);
       inFormat.setMinSplitSizeRack(BLOCKSIZE);
       inFormat.setMinSplitSizeRack(BLOCKSIZE);
       splits = inFormat.getSplits(conf, 1);
       splits = inFormat.getSplits(conf, 1);
       for (int i = 0; i < splits.length; ++i) {
       for (int i = 0; i < splits.length; ++i) {
-        CombineFileSplit fileSplit = (CombineFileSplit) splits[i];
+        fileSplit = (CombineFileSplit) splits[i];
         System.out.println("File split(Test2): " + fileSplit);
         System.out.println("File split(Test2): " + fileSplit);
       }
       }
       assertEquals(splits.length, 3);
       assertEquals(splits.length, 3);
+      fileSplit = (CombineFileSplit) splits[0];
+      assertEquals(fileSplit.getNumPaths(), 3);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getPath(0).getName(), file3.getName());
+      assertEquals(fileSplit.getOffset(0), 0);
+      assertEquals(fileSplit.getLength(0), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(1).getName(), file3.getName());
+      assertEquals(fileSplit.getOffset(1), BLOCKSIZE);
+      assertEquals(fileSplit.getLength(1), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(2).getName(), file3.getName());
+      assertEquals(fileSplit.getOffset(2), 2 * BLOCKSIZE);
+      assertEquals(fileSplit.getLength(2), BLOCKSIZE);
+      assertEquals(fileSplit.getLocations()[0], "/r3");
+      fileSplit = (CombineFileSplit) splits[1];
+      assertEquals(fileSplit.getNumPaths(), 2);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getPath(0).getName(), file2.getName());
+      assertEquals(fileSplit.getOffset(0), 0);
+      assertEquals(fileSplit.getLength(0), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(1).getName(), file2.getName());
+      assertEquals(fileSplit.getOffset(1), BLOCKSIZE);
+      assertEquals(fileSplit.getLength(1), BLOCKSIZE);
+      assertEquals(fileSplit.getLocations()[0], "/r2");
+      fileSplit = (CombineFileSplit) splits[2];
+      assertEquals(fileSplit.getNumPaths(), 1);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getPath(0).getName(), file1.getName());
+      assertEquals(fileSplit.getOffset(0), 0);
+      assertEquals(fileSplit.getLength(0), BLOCKSIZE);
+      assertEquals(fileSplit.getLocations()[0], "/r1");
 
 
       // create file4 on all three racks
       // create file4 on all three racks
-      writeFile(conf, new Path(dir4 + "/file4"), (short)3, 3);
+      Path file4 = new Path(dir4 + "/file4");
+      writeFile(conf, file4, (short)3, 3);
       inFormat = new DummyInputFormat();
       inFormat = new DummyInputFormat();
       inFormat.setInputPaths(conf, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
       inFormat.setInputPaths(conf, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
       inFormat.setMinSplitSizeRack(BLOCKSIZE);
       inFormat.setMinSplitSizeRack(BLOCKSIZE);
       splits = inFormat.getSplits(conf, 1);
       splits = inFormat.getSplits(conf, 1);
       for (int i = 0; i < splits.length; ++i) {
       for (int i = 0; i < splits.length; ++i) {
-        CombineFileSplit fileSplit = (CombineFileSplit) splits[i];
+        fileSplit = (CombineFileSplit) splits[i];
         System.out.println("File split(Test3): " + fileSplit);
         System.out.println("File split(Test3): " + fileSplit);
       }
       }
       assertEquals(splits.length, 3);
       assertEquals(splits.length, 3);
+      fileSplit = (CombineFileSplit) splits[0];
+      assertEquals(fileSplit.getNumPaths(), 6);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getPath(0).getName(), file3.getName());
+      assertEquals(fileSplit.getOffset(0), 0);
+      assertEquals(fileSplit.getLength(0), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(1).getName(), file3.getName());
+      assertEquals(fileSplit.getOffset(1), BLOCKSIZE);
+      assertEquals(fileSplit.getLength(1), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(2).getName(), file3.getName());
+      assertEquals(fileSplit.getOffset(2), 2 * BLOCKSIZE);
+      assertEquals(fileSplit.getLength(2), BLOCKSIZE);
+      assertEquals(fileSplit.getLocations()[0], "/r3");
+      fileSplit = (CombineFileSplit) splits[1];
+      assertEquals(fileSplit.getNumPaths(), 2);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getPath(0).getName(), file2.getName());
+      assertEquals(fileSplit.getOffset(0), 0);
+      assertEquals(fileSplit.getLength(0), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(1).getName(), file2.getName());
+      assertEquals(fileSplit.getOffset(1), BLOCKSIZE);
+      assertEquals(fileSplit.getLength(1), BLOCKSIZE);
+      assertEquals(fileSplit.getLocations()[0], "/r2");
+      fileSplit = (CombineFileSplit) splits[2];
+      assertEquals(fileSplit.getNumPaths(), 1);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getPath(0).getName(), file1.getName());
+      assertEquals(fileSplit.getOffset(0), 0);
+      assertEquals(fileSplit.getLength(0), BLOCKSIZE);
+      assertEquals(fileSplit.getLocations()[0], "/r1");
 
 
       // maximum split size is 2 blocks 
       // maximum split size is 2 blocks 
       inFormat = new DummyInputFormat();
       inFormat = new DummyInputFormat();
@@ -172,10 +255,38 @@ public class TestCombineFileInputFormat extends TestCase{
       inFormat.setInputPaths(conf, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
       inFormat.setInputPaths(conf, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
       splits = inFormat.getSplits(conf, 1);
       splits = inFormat.getSplits(conf, 1);
       for (int i = 0; i < splits.length; ++i) {
       for (int i = 0; i < splits.length; ++i) {
-        CombineFileSplit fileSplit = (CombineFileSplit) splits[i];
+        fileSplit = (CombineFileSplit) splits[i];
         System.out.println("File split(Test4): " + fileSplit);
         System.out.println("File split(Test4): " + fileSplit);
       }
       }
       assertEquals(splits.length, 5);
       assertEquals(splits.length, 5);
+      fileSplit = (CombineFileSplit) splits[0];
+      assertEquals(fileSplit.getNumPaths(), 2);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getPath(0).getName(), file3.getName());
+      assertEquals(fileSplit.getOffset(0), 0);
+      assertEquals(fileSplit.getLength(0), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(1).getName(), file3.getName());
+      assertEquals(fileSplit.getOffset(1), BLOCKSIZE);
+      assertEquals(fileSplit.getLength(1), BLOCKSIZE);
+      assertEquals(fileSplit.getLocations()[0], "host3.rack3.com");
+      fileSplit = (CombineFileSplit) splits[1];
+      assertEquals(fileSplit.getPath(0).getName(), file3.getName());
+      assertEquals(fileSplit.getOffset(0), 2 * BLOCKSIZE);
+      assertEquals(fileSplit.getLength(0), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(1).getName(), file4.getName());
+      assertEquals(fileSplit.getOffset(1), 0);
+      assertEquals(fileSplit.getLength(1), BLOCKSIZE);
+      assertEquals(fileSplit.getLocations()[0], "host3.rack3.com");
+      fileSplit = (CombineFileSplit) splits[2];
+      assertEquals(fileSplit.getNumPaths(), 2);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getPath(0).getName(), file4.getName());
+      assertEquals(fileSplit.getOffset(0), BLOCKSIZE);
+      assertEquals(fileSplit.getLength(0), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(1).getName(), file4.getName());
+      assertEquals(fileSplit.getOffset(1), 2 * BLOCKSIZE);
+      assertEquals(fileSplit.getLength(1), BLOCKSIZE);
+      assertEquals(fileSplit.getLocations()[0], "host3.rack3.com");
 
 
       // maximum split size is 3 blocks 
       // maximum split size is 3 blocks 
       inFormat = new DummyInputFormat();
       inFormat = new DummyInputFormat();
@@ -184,10 +295,51 @@ public class TestCombineFileInputFormat extends TestCase{
       inFormat.setInputPaths(conf, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
       inFormat.setInputPaths(conf, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
       splits = inFormat.getSplits(conf, 1);
       splits = inFormat.getSplits(conf, 1);
       for (int i = 0; i < splits.length; ++i) {
       for (int i = 0; i < splits.length; ++i) {
-        CombineFileSplit fileSplit = (CombineFileSplit) splits[i];
+        fileSplit = (CombineFileSplit) splits[i];
         System.out.println("File split(Test5): " + fileSplit);
         System.out.println("File split(Test5): " + fileSplit);
       }
       }
       assertEquals(splits.length, 4);
       assertEquals(splits.length, 4);
+      fileSplit = (CombineFileSplit) splits[0];
+      assertEquals(fileSplit.getNumPaths(), 3);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getPath(0).getName(), file3.getName());
+      assertEquals(fileSplit.getOffset(0), 0);
+      assertEquals(fileSplit.getLength(0), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(1).getName(), file3.getName());
+      assertEquals(fileSplit.getOffset(1), BLOCKSIZE);
+      assertEquals(fileSplit.getLength(1), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(2).getName(), file3.getName());
+      assertEquals(fileSplit.getOffset(2), 2 * BLOCKSIZE);
+      assertEquals(fileSplit.getLength(2), BLOCKSIZE);
+      assertEquals(fileSplit.getLocations()[0], "host3.rack3.com");
+      fileSplit = (CombineFileSplit) splits[1];
+      assertEquals(fileSplit.getPath(0).getName(), file4.getName());
+      assertEquals(fileSplit.getOffset(0), 0);
+      assertEquals(fileSplit.getLength(0), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(1).getName(), file4.getName());
+      assertEquals(fileSplit.getOffset(1), BLOCKSIZE);
+      assertEquals(fileSplit.getLength(1), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(2).getName(), file4.getName());
+      assertEquals(fileSplit.getOffset(2),  2 * BLOCKSIZE);
+      assertEquals(fileSplit.getLength(2), BLOCKSIZE);
+      assertEquals(fileSplit.getLocations()[0], "host3.rack3.com");
+      fileSplit = (CombineFileSplit) splits[2];
+      assertEquals(fileSplit.getNumPaths(), 2);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getPath(0).getName(), file2.getName());
+      assertEquals(fileSplit.getOffset(0), 0);
+      assertEquals(fileSplit.getLength(0), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(1).getName(), file2.getName());
+      assertEquals(fileSplit.getOffset(1), BLOCKSIZE);
+      assertEquals(fileSplit.getLength(1), BLOCKSIZE);
+      assertEquals(fileSplit.getLocations()[0], "host2.rack2.com");
+      fileSplit = (CombineFileSplit) splits[3];
+      assertEquals(fileSplit.getNumPaths(), 1);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getPath(0).getName(), file1.getName());
+      assertEquals(fileSplit.getOffset(0), 0);
+      assertEquals(fileSplit.getLength(0), BLOCKSIZE);
+      assertEquals(fileSplit.getLocations()[0], "host1.rack1.com");
 
 
       // maximum split size is 4 blocks 
       // maximum split size is 4 blocks 
       inFormat = new DummyInputFormat();
       inFormat = new DummyInputFormat();
@@ -195,10 +347,45 @@ public class TestCombineFileInputFormat extends TestCase{
       inFormat.setInputPaths(conf, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
       inFormat.setInputPaths(conf, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
       splits = inFormat.getSplits(conf, 1);
       splits = inFormat.getSplits(conf, 1);
       for (int i = 0; i < splits.length; ++i) {
       for (int i = 0; i < splits.length; ++i) {
-        CombineFileSplit fileSplit = (CombineFileSplit) splits[i];
+        fileSplit = (CombineFileSplit) splits[i];
         System.out.println("File split(Test6): " + fileSplit);
         System.out.println("File split(Test6): " + fileSplit);
       }
       }
       assertEquals(splits.length, 3);
       assertEquals(splits.length, 3);
+      fileSplit = (CombineFileSplit) splits[0];
+      assertEquals(fileSplit.getNumPaths(), 4);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getPath(0).getName(), file3.getName());
+      assertEquals(fileSplit.getOffset(0), 0);
+      assertEquals(fileSplit.getLength(0), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(1).getName(), file3.getName());
+      assertEquals(fileSplit.getOffset(1), BLOCKSIZE);
+      assertEquals(fileSplit.getLength(1), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(2).getName(), file3.getName());
+      assertEquals(fileSplit.getOffset(2), 2 * BLOCKSIZE);
+      assertEquals(fileSplit.getLength(2), BLOCKSIZE);
+      assertEquals(fileSplit.getLocations()[0], "host3.rack3.com");
+      fileSplit = (CombineFileSplit) splits[1];
+      assertEquals(fileSplit.getNumPaths(), 4);
+      assertEquals(fileSplit.getPath(0).getName(), file2.getName());
+      assertEquals(fileSplit.getOffset(0), 0);
+      assertEquals(fileSplit.getLength(0), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(1).getName(), file2.getName());
+      assertEquals(fileSplit.getOffset(1), BLOCKSIZE);
+      assertEquals(fileSplit.getLength(1), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(2).getName(), file4.getName());
+      assertEquals(fileSplit.getOffset(2), BLOCKSIZE);
+      assertEquals(fileSplit.getLength(2), BLOCKSIZE);
+      assertEquals(fileSplit.getPath(3).getName(), file4.getName());
+      assertEquals(fileSplit.getOffset(3),  2 * BLOCKSIZE);
+      assertEquals(fileSplit.getLength(3), BLOCKSIZE);
+      assertEquals(fileSplit.getLocations()[0], "host2.rack2.com");
+      fileSplit = (CombineFileSplit) splits[2];
+      assertEquals(fileSplit.getNumPaths(), 1);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getPath(0).getName(), file1.getName());
+      assertEquals(fileSplit.getOffset(0), 0);
+      assertEquals(fileSplit.getLength(0), BLOCKSIZE);
+      assertEquals(fileSplit.getLocations()[0], "/r1");
 
 
       // maximum split size is 7 blocks and min is 3 blocks
       // maximum split size is 7 blocks and min is 3 blocks
       inFormat = new DummyInputFormat();
       inFormat = new DummyInputFormat();
@@ -208,22 +395,26 @@ public class TestCombineFileInputFormat extends TestCase{
       inFormat.setInputPaths(conf, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
       inFormat.setInputPaths(conf, dir1 + "," + dir2 + "," + dir3 + "," + dir4);
       splits = inFormat.getSplits(conf, 1);
       splits = inFormat.getSplits(conf, 1);
       for (int i = 0; i < splits.length; ++i) {
       for (int i = 0; i < splits.length; ++i) {
-        CombineFileSplit fileSplit = (CombineFileSplit) splits[i];
+        fileSplit = (CombineFileSplit) splits[i];
         System.out.println("File split(Test7): " + fileSplit);
         System.out.println("File split(Test7): " + fileSplit);
       }
       }
       assertEquals(splits.length, 2);
       assertEquals(splits.length, 2);
+      fileSplit = (CombineFileSplit) splits[0];
+      assertEquals(fileSplit.getNumPaths(), 6);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getLocations()[0], "host3.rack3.com");
+      fileSplit = (CombineFileSplit) splits[1];
+      assertEquals(fileSplit.getNumPaths(), 3);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getLocations()[0], "host1.rack1.com");
 
 
       // Rack 1 has file1, file2 and file3 and file4
       // Rack 1 has file1, file2 and file3 and file4
       // Rack 2 has file2 and file3 and file4
       // Rack 2 has file2 and file3 and file4
       // Rack 3 has file3 and file4
       // Rack 3 has file3 and file4
-      Path file1 = new Path(dir1 + "/file1");
       file1 = new Path(conf.getWorkingDirectory(), file1);
       file1 = new Path(conf.getWorkingDirectory(), file1);
-      Path file2 = new Path(dir2 + "/file2");
       file2 = new Path(conf.getWorkingDirectory(), file2);
       file2 = new Path(conf.getWorkingDirectory(), file2);
-      Path file3 = new Path(dir3 + "/file3");
       file3 = new Path(conf.getWorkingDirectory(), file3);
       file3 = new Path(conf.getWorkingDirectory(), file3);
-      Path file4 = new Path(dir4 + "/file4");
-      file4 = new Path(conf.getWorkingDirectory(), file3);
+      file4 = new Path(conf.getWorkingDirectory(), file4);
 
 
       // setup a filter so that only file1 and file2 can be combined
       // setup a filter so that only file1 and file2 can be combined
       inFormat = new DummyInputFormat();
       inFormat = new DummyInputFormat();
@@ -233,10 +424,22 @@ public class TestCombineFileInputFormat extends TestCase{
                           new TestFilter(dir2));
                           new TestFilter(dir2));
       splits = inFormat.getSplits(conf, 1);
       splits = inFormat.getSplits(conf, 1);
       for (int i = 0; i < splits.length; ++i) {
       for (int i = 0; i < splits.length; ++i) {
-        CombineFileSplit fileSplit = (CombineFileSplit) splits[i];
+        fileSplit = (CombineFileSplit) splits[i];
         System.out.println("File split(TestPool1): " + fileSplit);
         System.out.println("File split(TestPool1): " + fileSplit);
       }
       }
       assertEquals(splits.length, 3);
       assertEquals(splits.length, 3);
+      fileSplit = (CombineFileSplit) splits[0];
+      assertEquals(fileSplit.getNumPaths(), 2);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getLocations()[0], "/r2");
+      fileSplit = (CombineFileSplit) splits[1];
+      assertEquals(fileSplit.getNumPaths(), 1);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getLocations()[0], "/r1");
+      fileSplit = (CombineFileSplit) splits[2];
+      assertEquals(fileSplit.getNumPaths(), 6);
+      assertEquals(fileSplit.getLocations().length, 1);
+      assertEquals(fileSplit.getLocations()[0], "/r3");
     } finally {
     } finally {
       if (dfs != null) {
       if (dfs != null) {
         dfs.shutdown();
         dfs.shutdown();
@@ -280,8 +483,41 @@ public class TestCombineFileInputFormat extends TestCase{
     }
     }
   }
   }
 
 
+  /*
+   * Prints out the input splits for the specified files
+   */
+  private void splitRealFiles(String[] args) throws IOException {
+    JobConf conf = new JobConf();
+    FileSystem fs = FileSystem.get(conf);
+    if (!(fs instanceof DistributedFileSystem)) {
+      throw new IOException("Wrong file system: " + fs.getClass().getName());
+    }
+    int blockSize = conf.getInt("dfs.block.size", 128 * 1024 * 1024);
+
+    DummyInputFormat inFormat = new DummyInputFormat();
+    for (int i = 0; i < args.length; i++) {
+      inFormat.addInputPaths(conf, args[i]);
+    }
+    inFormat.setMinSplitSizeRack(blockSize);
+    inFormat.setMaxSplitSize(10 * blockSize);
+
+    InputSplit[] splits = inFormat.getSplits(conf, 1);
+    System.out.println("Total number of splits " + splits.length);
+    for (int i = 0; i < splits.length; ++i) {
+      CombineFileSplit fileSplit = (CombineFileSplit) splits[i];
+      System.out.println("Split[" + i + "] " + fileSplit);
+    }
+  }
+
   public static void main(String[] args) throws Exception{
   public static void main(String[] args) throws Exception{
-    TestCombineFileInputFormat test = new TestCombineFileInputFormat();
-    test.testSplitPlacement();
+
+    // if there are some parameters specified, then use those paths
+    if (args.length != 0) {
+      TestCombineFileInputFormat test = new TestCombineFileInputFormat();
+      test.splitRealFiles(args);
+    } else {
+      TestCombineFileInputFormat test = new TestCombineFileInputFormat();
+      test.testSplitPlacement();
+    }
   }
   }
 }
 }