Browse Source

HDFS-1700. Federation: fsck needs to work with federation changes. Contributed by Matt Foley.


git-svn-id: https://svn.apache.org/repos/asf/hadoop/hdfs/branches/HDFS-1052@1078096 13f79535-47bb-0310-9956-ffa450edef68
Suresh Srinivas 14 years ago
parent
commit
3fc017f5e6

+ 3 - 0
CHANGES.txt

@@ -206,6 +206,9 @@ Trunk (unreleased changes)
     HDFS-1720. Federation: FSVolumeSet volumes is not synchronized correctly.
     HDFS-1720. Federation: FSVolumeSet volumes is not synchronized correctly.
     (suresh)
     (suresh)
 
 
+    HDFS-1700. Federation: fsck needs to work with federation changes.
+    (Matt Foley via suresh)
+
   IMPROVEMENTS
   IMPROVEMENTS
 
 
     HDFS-1510. Added test-patch.properties required by test-patch.sh (nigel)
     HDFS-1510. Added test-patch.properties required by test-patch.sh (nigel)

+ 79 - 0
src/java/org/apache/hadoop/hdfs/DFSUtil.java

@@ -391,6 +391,85 @@ public class DFSUtil {
     return addressList;
     return addressList;
   }
   }
   
   
+  /**
+   * Given the InetSocketAddress for any configured communication with a 
+   * namenode, this method returns the corresponding nameservice ID,
+   * by doing a reverse lookup on the list of nameservices until it
+   * finds a match.
+   * If null is returned, client should try {@link #isDefaultNamenodeAddress}
+   * to check pre-Federated configurations.
+   * Since the process of resolving URIs to Addresses is slightly expensive,
+   * this utility method should not be used in performance-critical routines.
+   * 
+   * @param conf - configuration
+   * @param address - InetSocketAddress for configured communication with NN.
+   *     Configured addresses are typically given as URIs, but we may have to
+   *     compare against a URI typed in by a human, or the server name may be
+   *     aliased, so we compare unambiguous InetSocketAddresses instead of just
+   *     comparing URI substrings.
+   * @param keys - list of configured communication parameters that should
+   *     be checked for matches.  For example, to compare against RPC addresses,
+   *     provide the list DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
+   *     DFS_NAMENODE_RPC_ADDRESS_KEY.  Use the generic parameter keys,
+   *     not the NameServiceId-suffixed keys.
+   * @return nameserviceId, or null if no match found
+   */
+  public static String getNameServiceIdFromAddress(Configuration conf, 
+      InetSocketAddress address, String... keys) {
+    Collection<String> nameserviceIds = getNameServiceIds(conf);
+
+    // Configuration with a single namenode and no nameserviceId
+    if (nameserviceIds == null || nameserviceIds.isEmpty()) {
+      // client should try {@link isDefaultNamenodeAddress} instead
+      return null;
+    }
+    // Get the candidateAddresses for all the configured nameServiceIds
+    for (String nameserviceId : nameserviceIds) {
+      for (String key : keys) {
+        String candidateAddress = conf.get(
+            getNameServiceIdKey(key, nameserviceId));
+        if (candidateAddress != null
+            && address.equals(NetUtils.createSocketAddr(candidateAddress)))
+          return nameserviceId;
+      }
+    }
+    // didn't find a match
+    // client should try {@link isDefaultNamenodeAddress} instead
+    return null;
+  }
+  
+  /**
+   * Given the InetSocketAddress for any configured communication with a 
+   * namenode, this method determines whether it is the configured
+   * communication channel for the "default" namenode.
+   * It does a reverse lookup on the list of default communication parameters
+   * to see if the given address matches any of them.
+   * Since the process of resolving URIs to Addresses is slightly expensive,
+   * this utility method should not be used in performance-critical routines.
+   * 
+   * @param conf - configuration
+   * @param address - InetSocketAddress for configured communication with NN.
+   *     Configured addresses are typically given as URIs, but we may have to
+   *     compare against a URI typed in by a human, or the server name may be
+   *     aliased, so we compare unambiguous InetSocketAddresses instead of just
+   *     comparing URI substrings.
+   * @param keys - list of configured communication parameters that should
+   *     be checked for matches.  For example, to compare against RPC addresses,
+   *     provide the list DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
+   *     DFS_NAMENODE_RPC_ADDRESS_KEY
+   * @return - boolean confirmation if matched generic parameter
+   */
+  public static boolean isDefaultNamenodeAddress(Configuration conf,
+      InetSocketAddress address, String... keys) {
+    for (String key : keys) {
+      String candidateAddress = conf.get(key);
+      if (candidateAddress != null
+          && address.equals(NetUtils.createSocketAddr(candidateAddress)))
+        return true;
+    }
+    return false;
+  }
+  
   /**
   /**
    * @return key specific to a nameserviceId from a generic key
    * @return key specific to a nameserviceId from a generic key
    */
    */

+ 82 - 3
src/java/org/apache/hadoop/hdfs/tools/DFSck.java

@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.InputStreamReader;
 import java.io.PrintStream;
 import java.io.PrintStream;
+import java.net.InetSocketAddress;
 import java.net.URL;
 import java.net.URL;
 import java.net.URLConnection;
 import java.net.URLConnection;
 import java.net.URLEncoder;
 import java.net.URLEncoder;
@@ -30,12 +31,17 @@ import java.security.PrivilegedExceptionAction;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck;
 import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.security.Krb5AndCertsSslSocketConnector;
 import org.apache.hadoop.security.Krb5AndCertsSslSocketConnector;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.hadoop.util.ToolRunner;
 
 
@@ -192,7 +198,72 @@ public class DFSck extends Configured implements Tool {
       errCode = 0;
       errCode = 0;
     return errCode;
     return errCode;
   }
   }
-            
+  
+  /*
+   * Derive the namenode http address from the current file system,
+   * either default or as set by "-fs" in the generic options.
+   * Returns null if failure.
+   */
+  private String getCurrentNamenodeAddress() {
+    String nnAddress = null;
+    Configuration conf = getConf();
+
+    //get the filesystem object
+    FileSystem fs;
+    try {
+      fs = FileSystem.get(conf);
+    } catch (IOException ioe) {
+      System.err.println("FileSystem is inaccessible due to:\n"
+          + StringUtils.stringifyException(ioe));
+      return null;
+    }
+    if (!(fs instanceof DistributedFileSystem)) {
+      System.err.println("FileSystem is " + fs.getUri());
+      return null;
+    }
+    DistributedFileSystem dfs = (DistributedFileSystem) fs;
+
+    // Derive the nameservice ID from the filesystem URI.
+    // The URI may have been provided by a human, and the server name may be
+    // aliased, so compare InetSocketAddresses instead of URI strings, and
+    // test against both possible variants of RPC address.
+    InetSocketAddress namenode = NameNode.getAddress(dfs.getUri().getAuthority());
+    String nameServiceId = DFSUtil.getNameServiceIdFromAddress(
+        conf, namenode,
+        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
+    
+    //Look up nameservice-specific http address
+    String httpAddressKey = UserGroupInformation.isSecurityEnabled() ?
+        DFSConfigKeys.DFS_NAMENODE_HTTPS_ADDRESS_KEY
+        : DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY;
+    if (nameServiceId != null) {
+      nnAddress = conf.get(DFSUtil.getNameServiceIdKey(
+          httpAddressKey, nameServiceId));
+      if (nnAddress != null) 
+        return nnAddress;
+      else {
+        System.err.println("Nameservice value for "
+            + DFSUtil.getNameServiceIdKey(httpAddressKey, nameServiceId)
+            + " expected but not available.  Trying generic value.");
+        //and fall through to next block
+      }
+    }
+    // If that didn't work, check to see whether the filesystem URI addresses 
+    // the default namenode.  If so, look up generic http address.
+    boolean isDefaultNamenode = DFSUtil.isDefaultNamenodeAddress(
+        conf, namenode,
+        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
+    if (isDefaultNamenode) {
+      return NameNode.getInfoServer(conf);
+    } else {
+      System.err.println("Cannot derive an unambiguous value for "
+          + httpAddressKey + " from FileSystem " + fs.getUri());
+      return null;
+    }
+  }
+
   private int doWork(final String[] args) throws IOException {
   private int doWork(final String[] args) throws IOException {
     String proto = "http://";
     String proto = "http://";
     if (UserGroupInformation.isSecurityEnabled()) {
     if (UserGroupInformation.isSecurityEnabled()) {
@@ -201,9 +272,17 @@ public class DFSck extends Configured implements Tool {
       proto = "https://";
       proto = "https://";
     }
     }
     final StringBuilder url = new StringBuilder(proto);
     final StringBuilder url = new StringBuilder(proto);
-    url.append(NameNode.getInfoServer(getConf()));
+    
+    String namenodeAddress = getCurrentNamenodeAddress();
+    if (namenodeAddress == null) {
+      //Error message already output in {@link #getCurrentNamenodeAddress()}
+      System.err.println("DFSck exiting.");
+      return 0;
+    }
+    url.append(namenodeAddress);
+    System.err.println("Connecting to namenode via " + url.toString());
+    
     url.append("/fsck?ugi=").append(ugi.getShortUserName()).append("&path=");
     url.append("/fsck?ugi=").append(ugi.getShortUserName()).append("&path=");
-
     String dir = "/";
     String dir = "/";
     // find top-level dir first
     // find top-level dir first
     for (int idx = 0; idx < args.length; idx++) {
     for (int idx = 0; idx < args.length; idx++) {

+ 57 - 5
src/test/hdfs/org/apache/hadoop/hdfs/TestDFSUtil.java

@@ -18,16 +18,21 @@
 
 
 package org.apache.hadoop.hdfs;
 package org.apache.hadoop.hdfs;
 
 
-import static org.junit.Assert.assertEquals;
-
 import java.io.IOException;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.net.InetSocketAddress;
+import java.net.URI;
 import java.util.Collection;
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.Iterator;
 import java.util.List;
 import java.util.List;
 
 
+import junit.framework.Assert;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
-import org.junit.Assert;
+import org.apache.hadoop.hdfs.protocol.FSConstants;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.net.NetUtils;
+
+import static org.junit.Assert.*;
 import org.junit.Test;
 import org.junit.Test;
 
 
 
 
@@ -38,6 +43,7 @@ public class TestDFSUtil {
    * {@link DFSUtil#getNameServiceIds(Configuration)}
    * {@link DFSUtil#getNameServiceIds(Configuration)}
    * {@link DFSUtil#getNameServiceId(Configuration)}
    * {@link DFSUtil#getNameServiceId(Configuration)}
    * {@link DFSUtil#getNNServiceRpcAddresses(Configuration)}
    * {@link DFSUtil#getNNServiceRpcAddresses(Configuration)}
+   * {@link DFSUtil#getNameServiceIdFromAddress()}
    */
    */
   @Test
   @Test
   public void testMultipleNamenodes() throws IOException {
   public void testMultipleNamenodes() throws IOException {
@@ -56,10 +62,13 @@ public class TestDFSUtil {
     assertEquals("nn1", DFSUtil.getNameServiceId(conf));
     assertEquals("nn1", DFSUtil.getNameServiceId(conf));
     
     
     // Test - configured list of namenodes are returned
     // Test - configured list of namenodes are returned
+    final String NN1_ADDRESS = "localhost:9000";
+    final String NN2_ADDRESS = "localhost:9001";
+    final String NN3_ADDRESS = "localhost:9002";
     conf.set(DFSUtil.getNameServiceIdKey(
     conf.set(DFSUtil.getNameServiceIdKey(
-        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, "nn1"), "localhost:9000");
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, "nn1"), NN1_ADDRESS);
     conf.set(DFSUtil.getNameServiceIdKey(
     conf.set(DFSUtil.getNameServiceIdKey(
-        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, "nn2"), "localhost:9001");
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, "nn2"), NN2_ADDRESS);
     
     
     Collection<InetSocketAddress> nnAddresses = 
     Collection<InetSocketAddress> nnAddresses = 
       DFSUtil.getNNServiceRpcAddresses(conf);
       DFSUtil.getNNServiceRpcAddresses(conf);
@@ -72,6 +81,49 @@ public class TestDFSUtil {
     addr = iterator.next();
     addr = iterator.next();
     assertEquals("localhost", addr.getHostName());
     assertEquals("localhost", addr.getHostName());
     assertEquals(9001, addr.getPort());
     assertEquals(9001, addr.getPort());
+    
+    // Test - can look up nameservice ID from service address
+    InetSocketAddress testAddress1 = NetUtils.createSocketAddr(NN1_ADDRESS);
+    String nameserviceId = DFSUtil.getNameServiceIdFromAddress(
+        conf, testAddress1,
+        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
+    assertEquals("nn1", nameserviceId);
+    InetSocketAddress testAddress2 = NetUtils.createSocketAddr(NN2_ADDRESS);
+    nameserviceId = DFSUtil.getNameServiceIdFromAddress(
+        conf, testAddress2,
+        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
+    assertEquals("nn2", nameserviceId);
+    InetSocketAddress testAddress3 = NetUtils.createSocketAddr(NN3_ADDRESS);
+    nameserviceId = DFSUtil.getNameServiceIdFromAddress(
+        conf, testAddress3,
+        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
+    assertNull(nameserviceId);
+  }
+  
+  /** 
+   * Test for
+   * {@link DFSUtil#isDefaultNamenodeAddress()}
+   */
+  @Test
+  public void testSingleNamenode() {
+    HdfsConfiguration conf = new HdfsConfiguration();
+    final String DEFAULT_ADDRESS = "localhost:9000";
+    final String NN2_ADDRESS = "localhost:9001";
+    conf.set(DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, DEFAULT_ADDRESS);
+    
+    InetSocketAddress testAddress1 = NetUtils.createSocketAddr(DEFAULT_ADDRESS);
+    boolean isDefault = DFSUtil.isDefaultNamenodeAddress(conf, testAddress1,
+        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
+    assertTrue(isDefault);
+    InetSocketAddress testAddress2 = NetUtils.createSocketAddr(NN2_ADDRESS);
+    isDefault = DFSUtil.isDefaultNamenodeAddress(conf, testAddress2,
+        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
+    assertFalse(isDefault);
   }
   }
   
   
   /** Tests to ensure default namenode is used as fallback */
   /** Tests to ensure default namenode is used as fallback */