16 years ago · 86813f49a0
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -208,6 +208,10 @@ Trunk (unreleased changes)
 
															     HADOOP-5292. Fix NPE in KFS::getBlockLocations. (Sriram Rao via lohit)
														
 
															+    HADOOP-5103. FileInputFormat now reuses the clusterMap network topology object
														
 
															+    and that brings down the log messages in the JobClient to do with 
														
 
															+    NetworkTopology.add significantly. (Jothi Padmanabhan via ddas)
														
 
															+
														
 
															 Release 0.20.0 - Unreleased
														
 
															   INCOMPATIBLE CHANGES
														
--- a/src/core/org/apache/hadoop/net/NetworkTopology.java
+++ b/src/core/org/apache/hadoop/net/NetworkTopology.java
@@ -316,7 +316,6 @@ public class NetworkTopology {
 
															       throw new IllegalArgumentException(
														
 
															         "Not allow to add an inner node: "+NodeBase.getPath(node));
														
 
															     }
														
 
															-    LOG.info("Adding a new node: "+NodeBase.getPath(node));
														
 
															     netlock.writeLock().lock();
														
 
															     try {
														
 
															       Node rack = getNode(node.getNetworkLocation());
														
@@ -326,6 +325,7 @@ public class NetworkTopology {
 
															                                            + " at an illegal network location");
														
 
															       }
														
 
															       if (clusterMap.add(node)) {
														
 
															+        LOG.info("Adding a new node: "+NodeBase.getPath(node));
														
 
															         if (rack == null) {
														
 
															           numOfRacks++;
														
 
															         }
														
--- a/src/mapred/org/apache/hadoop/mapred/FileInputFormat.java
+++ b/src/mapred/org/apache/hadoop/mapred/FileInputFormat.java
@@ -214,6 +214,7 @@ public abstract class FileInputFormat<K, V> implements InputFormat<K, V> {
 
															     // generate splits
														
 
															     ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
														
 
															+    NetworkTopology clusterMap = new NetworkTopology();
														
 
															     for (FileStatus file: files) {
														
 
															       Path path = file.getPath();
														
 
															       FileSystem fs = path.getFileSystem(job);
														
@@ -226,7 +227,7 @@ public abstract class FileInputFormat<K, V> implements InputFormat<K, V> {
 
															         long bytesRemaining = length;
														
 
															         while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {
														
 
															           String[] splitHosts = getSplitHosts(blkLocations, 
														
 
															-              length-bytesRemaining, splitSize);
														
 
															+              length-bytesRemaining, splitSize, clusterMap);
														
 
															           splits.add(new FileSplit(path, length-bytesRemaining, splitSize, 
														
 
															               splitHosts));
														
 
															           bytesRemaining -= splitSize;
														
@@ -237,7 +238,7 @@ public abstract class FileInputFormat<K, V> implements InputFormat<K, V> {
 
															                      blkLocations[blkLocations.length-1].getHosts()));
														
 
															         }
														
 
															       } else if (length != 0) {
														
 
															-        String[] splitHosts = getSplitHosts(blkLocations,0,length);
														
 
															+        String[] splitHosts = getSplitHosts(blkLocations,0,length,clusterMap);
														
 
															         splits.add(new FileSplit(path, 0, length, splitHosts));
														
 
															       } else { 
														
 
															         //Create empty hosts array for zero length files
														
@@ -417,7 +418,8 @@ public abstract class FileInputFormat<K, V> implements InputFormat<K, V> {
 
															    * @throws IOException
														
 
															    */
														
 
															   protected String[] getSplitHosts(BlockLocation[] blkLocations, 
														
 
															-      long offset, long splitSize)   throws IOException {
														
 
															+      long offset, long splitSize, NetworkTopology clusterMap)
														
 
															+  throws IOException {
														
 
															     int startIndex = getBlockIndex(blkLocations, offset);
														
@@ -442,7 +444,6 @@ public abstract class FileInputFormat<K, V> implements InputFormat<K, V> {
 
															     long bytesInLastBlock = bytesInThisBlock;
														
 
															     int endIndex = index - 1;
														
 
															-    NetworkTopology clusterMap = new NetworkTopology();
														
 
															     Map <Node,NodeInfo> hostsMap = new IdentityHashMap<Node,NodeInfo>();
														
 
															     Map <Node,NodeInfo> racksMap = new IdentityHashMap<Node,NodeInfo>();
														
 
															     String [] allTopos = new String[0];
														
@@ -486,6 +487,11 @@ public abstract class FileInputFormat<K, V> implements InputFormat<K, V> {
 
															         if (node == null) {
														
 
															           node = new NodeBase(topo);
														
 
															           clusterMap.add(node);
														
 
															+        }
														
 
															+        
														
 
															+        nodeInfo = hostsMap.get(node);
														
 
															+        
														
 
															+        if (nodeInfo == null) {
														
 
															           nodeInfo = new NodeInfo(node);
														
 
															           hostsMap.put(node,nodeInfo);
														
 
															           parentNode = node.getParent();
														
--- a/src/test/org/apache/hadoop/mapred/TestGetSplitHosts.java
+++ b/src/test/org/apache/hadoop/mapred/TestGetSplitHosts.java
@@ -18,6 +18,8 @@
 
															 package org.apache.hadoop.mapred;
														
 
															 import org.apache.hadoop.fs.BlockLocation;
														
 
															+import org.apache.hadoop.net.NetworkTopology;
														
 
															+
														
 
															 import junit.framework.TestCase;
														
 
															 public class TestGetSplitHosts extends TestCase {
														
@@ -28,6 +30,7 @@ public class TestGetSplitHosts extends TestCase {
 
															     int block1Size = 100, block2Size = 150, block3Size = 75;
														
 
															     int fileSize = block1Size + block2Size + block3Size;
														
 
															     int replicationFactor = 3;
														
 
															+    NetworkTopology clusterMap = new NetworkTopology();
														
 
															     BlockLocation[] bs = new BlockLocation[numBlocks];
														
@@ -72,7 +75,7 @@ public class TestGetSplitHosts extends TestCase {
 
															     SequenceFileInputFormat< String, String> sif = 
														
 
															       new SequenceFileInputFormat<String,String>();
														
 
															-    String [] hosts = sif.getSplitHosts(bs, 0, fileSize);
														
 
															+    String [] hosts = sif.getSplitHosts(bs, 0, fileSize, clusterMap);
														
 
															     // Contributions By Racks are
														
 
															     // Rack1   175       
														
@@ -93,7 +96,7 @@ public class TestGetSplitHosts extends TestCase {
 
															     bs[2] = new BlockLocation(block3Names,block3Hosts,block1Size+block2Size,
														
 
															                                block3Size);
														
 
															-    hosts = sif.getSplitHosts(bs, 0, fileSize);
														
 
															+    hosts = sif.getSplitHosts(bs, 0, fileSize, clusterMap);
														
 
															     // host1 makes the highest contribution among all hosts
														
 
															     // So, that should be returned before others