Forráskód Böngészése

HADOOP-4575. Add a proxy service for relaying HsftpFileSystem requests.
Includes client authentication via user certificates and config-based
access control. (Kan Zhang via cdouglas)


git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/trunk@712305 13f79535-47bb-0310-9956-ffa450edef68

Christopher Douglas 16 éve
szülő
commit
73e58e78d2
50 módosított fájl, 2923 hozzáadás és 14 törlés
  1. 4 0
      CHANGES.txt
  2. 1 0
      src/contrib/build.xml
  3. 30 0
      src/contrib/hdfsproxy/README
  4. 151 0
      src/contrib/hdfsproxy/bin/hdfsproxy
  5. 67 0
      src/contrib/hdfsproxy/bin/hdfsproxy-config.sh
  6. 141 0
      src/contrib/hdfsproxy/bin/hdfsproxy-daemon.sh
  7. 34 0
      src/contrib/hdfsproxy/bin/hdfsproxy-daemons.sh
  8. 68 0
      src/contrib/hdfsproxy/bin/hdfsproxy-slaves.sh
  9. 37 0
      src/contrib/hdfsproxy/bin/start-hdfsproxy.sh
  10. 28 0
      src/contrib/hdfsproxy/bin/stop-hdfsproxy.sh
  11. 163 0
      src/contrib/hdfsproxy/build.xml
  12. 24 0
      src/contrib/hdfsproxy/conf/configuration.xsl
  13. 59 0
      src/contrib/hdfsproxy/conf/hdfsproxy-default.xml
  14. 44 0
      src/contrib/hdfsproxy/conf/hdfsproxy-env.sh
  15. 44 0
      src/contrib/hdfsproxy/conf/hdfsproxy-env.sh.template
  16. 1 0
      src/contrib/hdfsproxy/conf/hdfsproxy-hosts
  17. 61 0
      src/contrib/hdfsproxy/conf/log4j.properties
  18. 26 0
      src/contrib/hdfsproxy/conf/user-certs.xml
  19. 28 0
      src/contrib/hdfsproxy/conf/user-permissions.xml
  20. BIN
      src/contrib/hdfsproxy/lib/commons-el.jar
  21. BIN
      src/contrib/hdfsproxy/lib/commons-logging-1.0.4.jar
  22. BIN
      src/contrib/hdfsproxy/lib/commons-logging-api-1.0.4.jar
  23. BIN
      src/contrib/hdfsproxy/lib/jasper-compiler.jar
  24. BIN
      src/contrib/hdfsproxy/lib/jasper-runtime.jar
  25. 202 0
      src/contrib/hdfsproxy/lib/jetty-5.1.4.LICENSE.txt
  26. BIN
      src/contrib/hdfsproxy/lib/jetty-5.1.4.jar
  27. BIN
      src/contrib/hdfsproxy/lib/jsp-api.jar
  28. 100 0
      src/contrib/hdfsproxy/lib/junit-3.8.1.LICENSE.txt
  29. BIN
      src/contrib/hdfsproxy/lib/junit-3.8.1.jar
  30. BIN
      src/contrib/hdfsproxy/lib/log4j-1.2.15.jar
  31. BIN
      src/contrib/hdfsproxy/lib/servlet-api.jar
  32. 24 0
      src/contrib/hdfsproxy/lib/slf4j-LICENSE.txt
  33. BIN
      src/contrib/hdfsproxy/lib/slf4j-api-1.4.3.jar
  34. BIN
      src/contrib/hdfsproxy/lib/slf4j-log4j12-1.4.3.jar
  35. BIN
      src/contrib/hdfsproxy/lib/xmlenc-0.52.jar
  36. 293 0
      src/contrib/hdfsproxy/src/java/org/apache/hadoop/hdfsproxy/HdfsProxy.java
  37. 51 0
      src/contrib/hdfsproxy/src/java/org/apache/hadoop/hdfsproxy/ProxyFileDataServlet.java
  38. 330 0
      src/contrib/hdfsproxy/src/java/org/apache/hadoop/hdfsproxy/ProxyFilter.java
  39. 252 0
      src/contrib/hdfsproxy/src/java/org/apache/hadoop/hdfsproxy/ProxyHttpServer.java
  40. 35 0
      src/contrib/hdfsproxy/src/java/org/apache/hadoop/hdfsproxy/ProxyListPathsServlet.java
  41. 55 0
      src/contrib/hdfsproxy/src/java/org/apache/hadoop/hdfsproxy/ProxyStreamFile.java
  42. 152 0
      src/contrib/hdfsproxy/src/java/org/apache/hadoop/hdfsproxy/ProxyUgiManager.java
  43. 262 0
      src/contrib/hdfsproxy/src/test/org/apache/hadoop/hdfsproxy/TestHdfsProxy.java
  44. 107 0
      src/contrib/hdfsproxy/src/test/org/apache/hadoop/hdfsproxy/TestProxyUgiManager.java
  45. 19 2
      src/hdfs/org/apache/hadoop/hdfs/HftpFileSystem.java
  46. 5 2
      src/hdfs/org/apache/hadoop/hdfs/HsftpFileSystem.java
  47. 3 2
      src/hdfs/org/apache/hadoop/hdfs/server/namenode/DfsServlet.java
  48. 1 0
      src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
  49. 11 4
      src/hdfs/org/apache/hadoop/hdfs/server/namenode/FileDataServlet.java
  50. 10 4
      src/hdfs/org/apache/hadoop/hdfs/server/namenode/StreamFile.java

+ 4 - 0
CHANGES.txt

@@ -17,6 +17,10 @@ Trunk (unreleased changes)
 
   NEW FEATURES
 
+    HADOOP-4575. Add a proxy service for relaying HsftpFileSystem requests.
+    Includes client authentication via user certificates and config-based
+    access control. (Kan Zhang via cdouglas)
+
   IMPROVEMENTS
 
     HADOOP-4234. Fix KFS "glue" layer to allow applications to interface

+ 1 - 0
src/contrib/build.xml

@@ -46,6 +46,7 @@
   <!-- ====================================================== -->
   <target name="test">
     <subant target="test">
+      <fileset dir="." includes="hdfsproxy/build.xml"/>
       <fileset dir="." includes="streaming/build.xml"/>
       <fileset dir="." includes="fairscheduler/build.xml"/>
       <fileset dir="." includes="capacity-scheduler/build.xml"/>

+ 30 - 0
src/contrib/hdfsproxy/README

@@ -0,0 +1,30 @@
+HDFSPROXY is an HTTPS proxy server that exposes the same HSFTP interface as a 
+real cluster. It authenticates users via user certificates and enforce access 
+control based on configuration files.
+
+Starting up an HDFSPROXY server is similar to starting up an HDFS cluster. 
+Simply run "hdfsproxy" shell command. The main configuration file is 
+hdfsproxy-default.xml, which should be on the classpath. hdfsproxy-env.sh 
+can be used to set up environmental variables. In particular, JAVA_HOME should 
+be set. Additional configuration files include user-certs.xml, 
+user-permissions.xml and ssl-server.xml, which are used to specify allowed user
+certs, allowed directories/files, and ssl keystore information for the proxy, 
+respectively. The location of these files can be specified in 
+hdfsproxy-default.xml. Environmental variable HDFSPROXY_CONF_DIR can be used to
+point to the directory where these configuration files are located. The 
+configuration files of the proxied HDFS cluster should also be available on the
+classpath (hadoop-default.xml and hadoop-site.xml).
+
+Mirroring those used in HDFS, a few shell scripts are provided to start and 
+stop a group of proxy servers. The hosts to run hdfsproxy on are specified in 
+hdfsproxy-hosts file, one host per line. All hdfsproxy servers are stateless 
+and run independently from each other. Simple load balancing can be set up by 
+mapping all hdfsproxy server IP addresses to a single hostname. Users should 
+use that hostname to access the proxy. If an IP address look up for that 
+hostname returns more than one IP addresses, an HFTP/HSFTP client will randomly
+pick one to use.
+
+Command "hdfsproxy -reloadPermFiles" can be used to trigger reloading of 
+user-certs.xml and user-permissions.xml files on all proxy servers listed in 
+the hdfsproxy-hosts file. Similarly, "hdfsproxy -clearUgiCache" command can be 
+used to clear the UGI caches on all proxy servers.

+ 151 - 0
src/contrib/hdfsproxy/bin/hdfsproxy

@@ -0,0 +1,151 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# The HdfsProxy command script
+#
+# Environment Variables
+#
+#   JAVA_HOME        The java implementation to use.  Overrides JAVA_HOME.
+#
+#   HDFSPROXY_CLASSPATH Extra Java CLASSPATH entries.
+#
+#   HDFSPROXY_HEAPSIZE  The maximum amount of heap to use, in MB. 
+#                    Default is 1000.
+#
+#   HDFSPROXY_OPTS      Extra Java runtime options.
+#   
+#   HDFSPROXY_NAMENODE_OPTS       These options are added to HDFSPROXY_OPTS 
+#   HDFSPROXY_CLIENT_OPTS         when the respective command is run.
+#   HDFSPROXY_{COMMAND}_OPTS etc  HDFSPROXY_JT_OPTS applies to JobTracker 
+#                              for e.g.  HDFSPROXY_CLIENT_OPTS applies to 
+#                              more than one command (fs, dfs, fsck, 
+#                              dfsadmin etc)  
+#
+#   HDFSPROXY_CONF_DIR  Alternate conf dir. Default is ${HDFSPROXY_HOME}/conf.
+#
+#   HDFSPROXY_ROOT_LOGGER The root appender. Default is INFO,console
+#
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin"/hdfsproxy-config.sh
+
+cygwin=false
+case "`uname`" in
+CYGWIN*) cygwin=true;;
+esac
+
+if [ -f "${HDFSPROXY_CONF_DIR}/hdfsproxy-env.sh" ]; then
+  . "${HDFSPROXY_CONF_DIR}/hdfsproxy-env.sh"
+fi
+
+# some Java parameters
+if [ "$JAVA_HOME" != "" ]; then
+  #echo "run java in $JAVA_HOME"
+  JAVA_HOME=$JAVA_HOME
+fi
+  
+if [ "$JAVA_HOME" = "" ]; then
+  echo "Error: JAVA_HOME is not set."
+  exit 1
+fi
+
+JAVA=$JAVA_HOME/bin/java
+JAVA_HEAP_MAX=-Xmx1000m 
+
+# check envvars which might override default args
+if [ "$HDFSPROXY_HEAPSIZE" != "" ]; then
+  #echo "run with heapsize $HDFSPROXY_HEAPSIZE"
+  JAVA_HEAP_MAX="-Xmx""$HDFSPROXY_HEAPSIZE""m"
+  #echo $JAVA_HEAP_MAX
+fi
+
+# CLASSPATH initially contains $HDFSPROXY_CONF_DIR
+CLASSPATH="${HDFSPROXY_CONF_DIR}"
+CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
+
+# for developers, add HdfsProxy classes to CLASSPATH
+if [ -d "$HDFSPROXY_HOME/build/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HDFSPROXY_HOME/build/classes
+fi
+if [ -d "$HDFSPROXY_HOME/build/webapps" ]; then
+  CLASSPATH=${CLASSPATH}:$HDFSPROXY_HOME/build
+fi
+if [ -d "$HDFSPROXY_HOME/build/test/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HDFSPROXY_HOME/build/test/classes
+fi
+
+# so that filenames w/ spaces are handled correctly in loops below
+IFS=
+
+# for releases, add hdfsproxy jar & webapps to CLASSPATH
+if [ -d "$HDFSPROXY_HOME/webapps" ]; then
+  CLASSPATH=${CLASSPATH}:$HDFSPROXY_HOME
+fi
+for f in $HDFSPROXY_HOME/hdfsproxy-*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+# add libs to CLASSPATH
+for f in $HDFSPROXY_HOME/lib/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+# add user-specified CLASSPATH last
+if [ "$HDFSPROXY_CLASSPATH" != "" ]; then
+  CLASSPATH=${CLASSPATH}:${HDFSPROXY_CLASSPATH}
+fi
+
+# default log directory & file
+if [ "$HDFSPROXY_LOG_DIR" = "" ]; then
+  HDFSPROXY_LOG_DIR="$HDFSPROXY_HOME/logs"
+fi
+if [ "$HDFSPROXY_LOGFILE" = "" ]; then
+  HDFSPROXY_LOGFILE='hdfsproxy.log'
+fi
+
+# restore ordinary behaviour
+unset IFS
+
+# figure out which class to run
+CLASS='org.apache.hadoop.hdfsproxy.HdfsProxy'
+
+# cygwin path translation
+if $cygwin; then
+  CLASSPATH=`cygpath -p -w "$CLASSPATH"`
+  HDFSPROXY_HOME=`cygpath -d "$HDFSPROXY_HOME"`
+  HDFSPROXY_LOG_DIR=`cygpath -d "$HDFSPROXY_LOG_DIR"`
+fi
+
+# cygwin path translation
+if $cygwin; then
+  JAVA_LIBRARY_PATH=`cygpath -p "$JAVA_LIBRARY_PATH"`
+fi
+
+HDFSPROXY_OPTS="$HDFSPROXY_OPTS -Dhdfsproxy.log.dir=$HDFSPROXY_LOG_DIR"
+HDFSPROXY_OPTS="$HDFSPROXY_OPTS -Dhdfsproxy.log.file=$HDFSPROXY_LOGFILE"
+HDFSPROXY_OPTS="$HDFSPROXY_OPTS -Dhdfsproxy.home.dir=$HDFSPROXY_HOME"
+HDFSPROXY_OPTS="$HDFSPROXY_OPTS -Dhdfsproxy.id.str=$HDFSPROXY_IDENT_STRING"
+HDFSPROXY_OPTS="$HDFSPROXY_OPTS -Dhdfsproxy.root.logger=${HDFSPROXY_ROOT_LOGGER:-INFO,console}"
+if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
+  HDFSPROXY_OPTS="$HDFSPROXY_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
+fi  
+
+# run it
+exec "$JAVA" $JAVA_HEAP_MAX $HDFSPROXY_OPTS -classpath "$CLASSPATH" $CLASS "$@"

+ 67 - 0
src/contrib/hdfsproxy/bin/hdfsproxy-config.sh

@@ -0,0 +1,67 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# included in all the hadoop scripts with source command
+# should not be executable directly
+# also should not be passed any arguments, since we need original $*
+
+# resolve links - $0 may be a softlink
+
+this="$0"
+while [ -h "$this" ]; do
+  ls=`ls -ld "$this"`
+  link=`expr "$ls" : '.*-> \(.*\)$'`
+  if expr "$link" : '.*/.*' > /dev/null; then
+    this="$link"
+  else
+    this=`dirname "$this"`/"$link"
+  fi
+done
+
+# convert relative path to absolute path
+bin=`dirname "$this"`
+script=`basename "$this"`
+bin=`cd "$bin"; pwd`
+this="$bin/$script"
+
+# the root of the HdfsProxy installation
+export HDFSPROXY_HOME=`dirname "$this"`/..
+
+#check to see if the conf dir is given as an optional argument
+if [ $# -gt 1 ]
+then
+    if [ "--config" = "$1" ]
+	  then
+	      shift
+	      confdir=$1
+	      shift
+	      HDFSPROXY_CONF_DIR=$confdir
+    fi
+fi
+ 
+# Allow alternate conf dir location.
+HDFSPROXY_CONF_DIR="${HDFSPROXY_CONF_DIR:-$HDFSPROXY_HOME/conf}"
+
+#check to see it is specified whether to use the slaves file
+if [ $# -gt 1 ]
+then
+    if [ "--hosts" = "$1" ]
+    then
+        shift
+        slavesfile=$1
+        shift
+        export HDFSPROXY_SLAVES="${HDFSPROXY_CONF_DIR}/$slavesfile"
+    fi
+fi

+ 141 - 0
src/contrib/hdfsproxy/bin/hdfsproxy-daemon.sh

@@ -0,0 +1,141 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Runs a HdfsProxy as a daemon.
+#
+# Environment Variables
+#
+#   HDFSPROXY_CONF_DIR  Alternate conf dir. Default is ${HDFSPROXY_HOME}/conf.
+#   HDFSPROXY_LOG_DIR   Where log files are stored.  PWD by default.
+#   HDFSPROXY_MASTER    host:path where hdfsproxy code should be rsync'd from
+#   HDFSPROXY_PID_DIR   The pid files are stored. /tmp by default.
+#   HDFSPROXY_IDENT_STRING   A string representing this instance of hdfsproxy. $USER by default
+#   HDFSPROXY_NICENESS The scheduling priority for daemons. Defaults to 0.
+##
+
+usage="Usage: hdfsproxy-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] (start|stop) "
+
+# if no args specified, show usage
+if [ $# -le 1 ]; then
+  echo $usage
+  exit 1
+fi
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin"/hdfsproxy-config.sh
+
+# get arguments
+startStop=$1
+shift
+
+hdfsproxy_rotate_log ()
+{
+    log=$1;
+    num=5;
+    if [ -n "$2" ]; then
+	num=$2
+    fi
+    if [ -f "$log" ]; then # rotate logs
+	while [ $num -gt 1 ]; do
+	    prev=`expr $num - 1`
+	    [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
+	    num=$prev
+	done
+	mv "$log" "$log.$num";
+    fi
+}
+
+if [ -f "${HDFSPROXY_CONF_DIR}/hdfsproxy-env.sh" ]; then
+  . "${HDFSPROXY_CONF_DIR}/hdfsproxy-env.sh"
+fi
+
+# get log directory
+if [ "$HDFSPROXY_LOG_DIR" = "" ]; then
+  export HDFSPROXY_LOG_DIR="$HDFSPROXY_HOME/logs"
+fi
+mkdir -p "$HDFSPROXY_LOG_DIR"
+
+if [ "$HDFSPROXY_PID_DIR" = "" ]; then
+  HDFSPROXY_PID_DIR=/tmp
+fi
+
+if [ "$HDFSPROXY_IDENT_STRING" = "" ]; then
+  export HDFSPROXY_IDENT_STRING="$USER"
+fi
+
+# some variables
+export HDFSPROXY_LOGFILE=hdfsproxy-$HDFSPROXY_IDENT_STRING-$HOSTNAME.log
+export HDFSPROXY_ROOT_LOGGER="INFO,DRFA"
+log=$HDFSPROXY_LOG_DIR/hdfsproxy-$HDFSPROXY_IDENT_STRING-$HOSTNAME.out
+pid=$HDFSPROXY_PID_DIR/hdfsproxy-$HDFSPROXY_IDENT_STRING.pid
+
+# Set default scheduling priority
+if [ "$HDFSPROXY_NICENESS" = "" ]; then
+    export HDFSPROXY_NICENESS=0
+fi
+
+case $startStop in
+
+  (start)
+
+    mkdir -p "$HDFSPROXY_PID_DIR"
+
+    if [ -f $pid ]; then
+      if kill -0 `cat $pid` > /dev/null 2>&1; then
+        echo hdfsproxy running as process `cat $pid`.  Stop it first.
+        exit 1
+      fi
+    fi
+
+    if [ "$HDFSPROXY_MASTER" != "" ]; then
+      echo rsync from $HDFSPROXY_MASTER
+      rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $HDFSPROXY_MASTER/ "$HDFSPROXY_HOME"
+    fi
+
+    hdfsproxy_rotate_log $log
+    echo starting hdfsproxy, logging to $log
+    cd "$HDFSPROXY_HOME"
+    nohup nice -n $HDFSPROXY_NICENESS "$HDFSPROXY_HOME"/bin/hdfsproxy --config $HDFSPROXY_CONF_DIR "$@" > "$log" 2>&1 < /dev/null &
+    echo $! > $pid
+    sleep 1; head "$log"
+    ;;
+          
+  (stop)
+
+    if [ -f $pid ]; then
+      if kill -0 `cat $pid` > /dev/null 2>&1; then
+        echo stopping hdfsproxy
+        kill `cat $pid`
+      else
+        echo no hdfsproxy to stop
+      fi
+    else
+      echo no hdfsproxy to stop
+    fi
+    ;;
+
+  (*)
+    echo $usage
+    exit 1
+    ;;
+
+esac
+
+

+ 34 - 0
src/contrib/hdfsproxy/bin/hdfsproxy-daemons.sh

@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Run a HdfsProxy command on all slave hosts.
+
+usage="Usage: hdfsproxy-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] "
+
+# if no args specified, show usage
+if [ $# -le 1 ]; then
+  echo $usage
+  exit 1
+fi
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. $bin/hdfsproxy-config.sh
+
+exec "$bin/hdfsproxy-slaves.sh" --config $HDFSPROXY_CONF_DIR cd "$HDFSPROXY_HOME" \; "$bin/hdfsproxy-daemon.sh" --config $HDFSPROXY_CONF_DIR "$@"

+ 68 - 0
src/contrib/hdfsproxy/bin/hdfsproxy-slaves.sh

@@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Run a shell command on all slave hosts.
+#
+# Environment Variables
+#
+#   HDFSPROXY_SLAVES    File naming remote hosts.
+#     Default is ${HDFSPROXY_CONF_DIR}/hdfsproxy-hosts.
+#   HDFSPROXY_CONF_DIR  Alternate conf dir. Default is ${HDFSPROXY_HOME}/conf.
+#   HDFSPROXY_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
+#   HDFSPROXY_SSH_OPTS Options passed to ssh when running remote commands.
+##
+
+usage="Usage: hdfsproxy-slaves.sh [--config confdir] command..."
+
+# if no args specified, show usage
+if [ $# -le 0 ]; then
+  echo $usage
+  exit 1
+fi
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin"/hdfsproxy-config.sh
+
+# If the slaves file is specified in the command line,
+# then it takes precedence over the definition in 
+# hdfsproxy-env.sh. Save it here.
+HOSTLIST=$HDFSPROXY_SLAVES
+
+if [ -f "${HDFSPROXY_CONF_DIR}/hdfsproxy-env.sh" ]; then
+  . "${HDFSPROXY_CONF_DIR}/hdfsproxy-env.sh"
+fi
+
+if [ "$HOSTLIST" = "" ]; then
+  if [ "$HDFSPROXY_SLAVES" = "" ]; then
+    export HOSTLIST="${HDFSPROXY_CONF_DIR}/hdfsproxy-hosts"
+  else
+    export HOSTLIST="${HDFSPROXY_SLAVES}"
+  fi
+fi
+
+for slave in `cat "$HOSTLIST"`; do
+ ssh $HDFSPROXY_SSH_OPTS $slave $"${@// /\\ }" \
+   2>&1 | sed "s/^/$slave: /" &
+ if [ "$HDFSPROXY_SLAVE_SLEEP" != "" ]; then
+   sleep $HDFSPROXY_SLAVE_SLEEP
+ fi
+done
+
+wait

+ 37 - 0
src/contrib/hdfsproxy/bin/start-hdfsproxy.sh

@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Start hdfsproxy daemons.
+# Run this on master node.
+
+usage="Usage: start-hdfsproxy.sh"
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin"/hdfsproxy-config.sh
+
+# get arguments
+if [ $# -ge 1 ]; then
+  echo $usage
+  exit 1
+fi
+
+# start hdfsproxy daemons
+# "$bin"/hdfsproxy-daemon.sh --config $HDFSPROXY_CONF_DIR start
+"$bin"/hdfsproxy-daemons.sh --config $HDFSPROXY_CONF_DIR --hosts hdfsproxy-hosts start

+ 28 - 0
src/contrib/hdfsproxy/bin/stop-hdfsproxy.sh

@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Stop hdfsproxy daemons.  Run this on master node.
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin"/hdfsproxy-config.sh
+
+# "$bin"/hdfsproxy-daemon.sh --config $HDFSPROXY_CONF_DIR stop
+"$bin"/hdfsproxy-daemons.sh --config $HDFSPROXY_CONF_DIR --hosts hdfsproxy-hosts stop
+

+ 163 - 0
src/contrib/hdfsproxy/build.xml

@@ -0,0 +1,163 @@
+<?xml version="1.0" ?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<project name="hdfsproxy" default="jar">
+	<property name="hdfsproxyVersion" value="1.0"/>
+	<property name="final.name" value="${ant.project.name}-${hdfsproxyVersion}"/>
+	<property name="bin.dir" value="${basedir}/bin"/>
+	<property name="lib.dir" value="${basedir}/lib"/>
+	<property name="conf.dir" value="${basedir}/conf"/>
+	<property name="docs.dir" value="${basedir}/docs"/>
+	<import file="../build-contrib.xml"/>
+
+	<target name="jar" depends="compile" description="Create jar">
+		<echo>
+            Building the .jar files.
+        </echo>
+		<jar jarfile="${build.dir}/${final.name}.jar" basedir="${build.classes}" includes="org/apache/hadoop/hdfsproxy/**/*.class" >
+                        <manifest>
+                            <section name="org/apache/hadoop/hdfsproxy">
+                                <attribute name="Implementation-Title" value="HdfsProxy"/>
+                                <attribute name="Implementation-Version" value="${hdfsproxyVersion}"/>
+                                <attribute name="Implementation-Vendor" value="Apache"/>
+                            </section>
+                        </manifest>
+
+			<fileset dir="${basedir}/src/java">
+				<include name="org/apache/hadoop/hdfsproxy/**/*.java"/>
+			</fileset>
+		</jar>
+	</target>
+
+	<!-- ====================================================== -->
+	<!-- Macro definitions                                      -->
+	<!-- ====================================================== -->
+	<macrodef name="macro_tar" description="Worker Macro for tar">
+		<attribute name="param.destfile"/>
+		<element name="param.listofitems"/>
+		<sequential>
+			<tar compression="gzip" longfile="gnu"
+          destfile="@{param.destfile}">
+				<param.listofitems/>
+			</tar>
+		</sequential>
+	</macrodef>
+
+	<!-- ================================================================== -->
+	<!-- D I S T R I B U T I O N                                            -->
+	<!-- ================================================================== -->
+	<!--                                                                    -->
+	<!-- ================================================================== -->
+	<target name="package" depends="jar" description="Build distribution">
+		<mkdir dir="${build.dir}/${final.name}"/>
+		<mkdir dir="${build.dir}/${final.name}/lib"/>
+		<mkdir dir="${build.dir}/${final.name}/logs"/>
+
+		<copy todir="${build.dir}/${final.name}" includeEmptyDirs="false">
+			<fileset dir="${build.dir}">
+				<include name="**/*.jar" />
+				<include name="**/*.war" />
+			</fileset>
+		</copy>
+		<copy todir="${build.dir}/${final.name}/lib" includeEmptyDirs="false">
+			<fileset dir="${lib.dir}">
+				<exclude name="**/native/**"/>
+			</fileset>
+		</copy>
+
+		<copy todir="${build.dir}/${final.name}/lib" includeEmptyDirs="false">
+                  	<fileset dir="${hadoop.root}/build">
+                          	<include name="*-core.jar"/>
+                          	<include name="*-tools.jar"/>
+			</fileset>
+		</copy>
+
+		<copy todir="${build.dir}/${final.name}/bin">
+			<fileset dir="${bin.dir}"/>
+		</copy>
+
+		<copy todir="${build.dir}/${final.name}/conf">
+			<fileset dir="${conf.dir}"/>
+		</copy>
+
+		<copy todir="${build.dir}/${final.name}">
+			<fileset dir="${basedir}">
+				<include name="README" />
+				<include name="build.xml" />
+				<include name="*.txt" />
+			</fileset>
+		</copy>
+
+		<copy todir="${build.dir}/${final.name}/src" includeEmptyDirs="true">
+			<fileset dir="${src.dir}" excludes="**/*.template **/docs/build/**/*"/>
+		</copy>
+
+		<chmod perm="ugo+x" type="file" parallel="false">
+			<fileset dir="${build.dir}/${final.name}/bin"/>
+		</chmod>
+
+	</target>
+
+	<!-- ================================================================== -->
+	<!-- Make release tarball                                               -->
+	<!-- ================================================================== -->
+	<target name="tar" depends="package" description="Make release tarball">
+		<macro_tar param.destfile="${build.dir}/${final.name}.tar.gz">
+			<param.listofitems>
+				<tarfileset dir="${build.dir}" mode="664">
+					<exclude name="${final.name}/bin/*" />
+					<include name="${final.name}/**" />
+				</tarfileset>
+				<tarfileset dir="${build.dir}" mode="755">
+					<include name="${final.name}/bin/*" />
+				</tarfileset>
+			</param.listofitems>
+		</macro_tar>
+	</target>
+
+	<target name="binary" depends="package" description="Make tarball without source and documentation">
+		<macro_tar param.destfile="${build.dir}/${final.name}-bin.tar.gz">
+			<param.listofitems>
+				<tarfileset dir="${build.dir}" mode="664">
+					<exclude name="${final.name}/src/**" />
+					<exclude name="${final.name}/docs/**" />
+					<include name="${final.name}/**" />
+				</tarfileset>
+				<tarfileset dir="${build.dir}" mode="755">
+					<include name="${final.name}/bin/*" />
+				</tarfileset>
+			</param.listofitems>
+		</macro_tar>
+	</target>
+
+  <!-- the unit test classpath -->
+  <path id="test.classpath">
+    <pathelement location="${build.test}" />
+    <pathelement location="${hadoop.root}/build/test/classes"/>
+    <pathelement location="${hadoop.root}/src/contrib/test"/>
+    <pathelement location="${hadoop.root}/conf"/>
+    <pathelement location="${hadoop.root}/build"/>
+    <pathelement location="${hadoop.root}/build/classes"/>
+    <pathelement location="${hadoop.root}/build/tools"/>
+    <pathelement location="${build.examples}"/>
+    <path refid="classpath"/>
+  </path>
+
+
+</project>

+ 24 - 0
src/contrib/hdfsproxy/conf/configuration.xsl

@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+<xsl:output method="html"/>
+<xsl:template match="configuration">
+<html>
+<body>
+<table border="1">
+<tr>
+ <td>name</td>
+ <td>value</td>
+ <td>description</td>
+</tr>
+<xsl:for-each select="property">
+<tr>
+  <td><a name="{name}"><xsl:value-of select="name"/></a></td>
+  <td><xsl:value-of select="value"/></td>
+  <td><xsl:value-of select="description"/></td>
+</tr>
+</xsl:for-each>
+</table>
+</body>
+</html>
+</xsl:template>
+</xsl:stylesheet>

+ 59 - 0
src/contrib/hdfsproxy/conf/hdfsproxy-default.xml

@@ -0,0 +1,59 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put hdfsproxy specific properties in this file. -->
+
+<configuration>
+
+<property>
+  <name>hdfsproxy.https.address</name>
+  <value>0.0.0.0:50479</value>
+  <description>the SSL port that hdfsproxy listens on
+  </description>
+</property>
+
+<property>
+  <name>hdfsproxy.hosts</name>
+  <value>hdfsproxy-hosts</value>
+  <description>location of hdfsproxy-hosts file
+  </description>
+</property>
+
+<property>
+  <name>hdfsproxy.dfs.namenode.address</name>
+  <value></value>
+  <description>namenode address of the HDFS cluster being proxied
+  </description>
+</property>
+
+<property>
+  <name>hdfsproxy.https.server.keystore.resource</name>
+  <value>ssl-server.xml</value>
+  <description>location of the resource from which ssl server keystore
+  information will be extracted
+  </description>
+</property>
+
+<property>
+  <name>hdfsproxy.user.permissions.file.location</name>
+  <value>user-permissions.xml</value>
+  <description>location of the user permissions file
+  </description>
+</property>
+
+<property>
+  <name>hdfsproxy.user.certs.file.location</name>
+  <value>user-certs.xml</value>
+  <description>location of the user certs file
+  </description>
+</property>
+
+<property>
+  <name>hdfsproxy.ugi.cache.ugi.lifetime</name>
+  <value>15</value>
+  <description> The lifetime (in minutes) of a cached ugi
+  </description>
+</property>
+
+</configuration>
+

+ 44 - 0
src/contrib/hdfsproxy/conf/hdfsproxy-env.sh

@@ -0,0 +1,44 @@
+# Set HdfsProxy-specific environment variables here.
+
+# The only required environment variable is JAVA_HOME.  All others are
+# optional.  When running a distributed configuration it is best to
+# set JAVA_HOME in this file, so that it is correctly defined on
+# remote nodes.
+
+# The java implementation to use.  Required.
+# export JAVA_HOME=/usr/lib/j2sdk1.5-sun
+
+# Extra Java CLASSPATH elements.  Optional.
+# export HDFSPROXY_CLASSPATH=
+
+# The maximum amount of heap to use, in MB. Default is 1000.
+# export HDFSPROXY_HEAPSIZE=2000
+
+# Extra Java runtime options.  Empty by default.
+# export HDFSPROXY_OPTS=
+
+# Extra ssh options.  Empty by default.
+# export HDFSPROXY_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HDFSPROXY_CONF_DIR"
+
+# Where log files are stored.  $HDFSPROXY_HOME/logs by default.
+# export HDFSPROXY_LOG_DIR=${HDFSPROXY_HOME}/logs
+
+# File naming remote slave hosts.  $HDFSPROXY_HOME/conf/slaves by default.
+# export HDFSPROXY_SLAVES=${HDFSPROXY_HOME}/conf/slaves
+
+# host:path where hdfsproxy code should be rsync'd from.  Unset by default.
+# export HDFSPROXY_MASTER=master:/home/$USER/src/hdfsproxy
+
+# Seconds to sleep between slave commands.  Unset by default.  This
+# can be useful in large clusters, where, e.g., slave rsyncs can
+# otherwise arrive faster than the master can service them.
+# export HDFSPROXY_SLAVE_SLEEP=0.1
+
+# The directory where pid files are stored. /tmp by default.
+# export HDFSPROXY_PID_DIR=/var/hdfsproxy/pids
+
+# A string representing this instance of hdfsproxy. $USER by default.
+# export HDFSPROXY_IDENT_STRING=$USER
+
+# The scheduling priority for daemon processes.  See 'man nice'.
+# export HDFSPROXY_NICENESS=10

+ 44 - 0
src/contrib/hdfsproxy/conf/hdfsproxy-env.sh.template

@@ -0,0 +1,44 @@
+# Set HdfsProxy-specific environment variables here.
+
+# The only required environment variable is JAVA_HOME.  All others are
+# optional.  When running a distributed configuration it is best to
+# set JAVA_HOME in this file, so that it is correctly defined on
+# remote nodes.
+
+# The java implementation to use.  Required.
+# export JAVA_HOME=/usr/lib/j2sdk1.5-sun
+
+# Extra Java CLASSPATH elements.  Optional.
+# export HDFSPROXY_CLASSPATH=
+
+# The maximum amount of heap to use, in MB. Default is 1000.
+# export HDFSPROXY_HEAPSIZE=2000
+
+# Extra Java runtime options.  Empty by default.
+# export HDFSPROXY_OPTS=
+
+# Extra ssh options.  Empty by default.
+# export HDFSPROXY_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HDFSPROXY_CONF_DIR"
+
+# Where log files are stored.  $HDFSPROXY_HOME/logs by default.
+# export HDFSPROXY_LOG_DIR=${HDFSPROXY_HOME}/logs
+
+# File naming remote slave hosts.  $HDFSPROXY_HOME/conf/slaves by default.
+# export HDFSPROXY_SLAVES=${HDFSPROXY_HOME}/conf/slaves
+
+# host:path where hdfsproxy code should be rsync'd from.  Unset by default.
+# export HDFSPROXY_MASTER=master:/home/$USER/src/hdfsproxy
+
+# Seconds to sleep between slave commands.  Unset by default.  This
+# can be useful in large clusters, where, e.g., slave rsyncs can
+# otherwise arrive faster than the master can service them.
+# export HDFSPROXY_SLAVE_SLEEP=0.1
+
+# The directory where pid files are stored. /tmp by default.
+# export HDFSPROXY_PID_DIR=/var/hdfsproxy/pids
+
+# A string representing this instance of hdfsproxy. $USER by default.
+# export HDFSPROXY_IDENT_STRING=$USER
+
+# The scheduling priority for daemon processes.  See 'man nice'.
+# export HDFSPROXY_NICENESS=10

+ 1 - 0
src/contrib/hdfsproxy/conf/hdfsproxy-hosts

@@ -0,0 +1 @@
+localhost

+ 61 - 0
src/contrib/hdfsproxy/conf/log4j.properties

@@ -0,0 +1,61 @@
+# Define some default values that can be overridden by system properties
+hdfsproxy.root.logger=INFO,console
+hdfsproxy.log.dir=.
+hdfsproxy.log.file=hdfsproxy.log
+
+# Define the root logger to the system property "hdfsproxy.root.logger".
+log4j.rootLogger=${hdfsproxy.root.logger}
+
+# Logging Threshold
+log4j.threshhold=ALL
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hdfsproxy.log.dir}/${hdfsproxy.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this 
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# Rolling File Appender
+#
+
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hdfsproxy.log.dir}/${hdfsproxy.log.file}
+
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.hdfsproxy.HttpsProxy=DEBUG
+#log4j.logger.org.apache.hadoop.hdfsproxy.ProxyFilter=DEBUG
+

+ 26 - 0
src/contrib/hdfsproxy/conf/user-certs.xml

@@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- 
+
+This file defines the mappings from username to comma seperated list
+of certificate serial numbers that the user is allowed to use. One mapping
+per user. Wildcard characters, such as "*" and "?", are not recognized. 
+Any leading or trailing whitespaces are stripped/ignored. Note that user
+"Admin" is the special hdfsproxy admin user. To make a user an admin, add 
+the user's certificate serial number to user "Admin". Normal users cannot 
+have "Admin" as username. Usernames can only comprise of 0-9a-zA-Z and
+underscore.
+
+-->
+
+<configuration>
+
+<property>
+  <name>Admin</name>
+  <value></value>
+  <description> Special hdfsproxy admin user
+  </description>
+</property>
+
+</configuration>

+ 28 - 0
src/contrib/hdfsproxy/conf/user-permissions.xml

@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- 
+
+This file defines the mappings from username to comma seperated list
+of directories/files that the user is allowed to use. One mapping
+per user. Wildcard characters, such as "*" and "?", are not recognized. 
+For example, to match "/output" directory, one can use "/output" or 
+"/output/", but not "/output/*". Any leading or trailing whitespaces 
+in the name field are stripped/ignored, while only leading whitespaces 
+in the value field are. Note that the special hdfsproxy admin user "Admin"
+doesn't automatically have access to any files, unless explicitly 
+specified in this file. Usernames can only comprise of 0-9a-zA-Z and 
+underscore.
+
+-->
+
+<configuration>
+
+<property>
+  <name></name>
+  <value></value>
+  <description>
+  </description>
+</property>
+
+</configuration>

BIN
src/contrib/hdfsproxy/lib/commons-el.jar


BIN
src/contrib/hdfsproxy/lib/commons-logging-1.0.4.jar


BIN
src/contrib/hdfsproxy/lib/commons-logging-api-1.0.4.jar


BIN
src/contrib/hdfsproxy/lib/jasper-compiler.jar


BIN
src/contrib/hdfsproxy/lib/jasper-runtime.jar


+ 202 - 0
src/contrib/hdfsproxy/lib/jetty-5.1.4.LICENSE.txt

@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

BIN
src/contrib/hdfsproxy/lib/jetty-5.1.4.jar


BIN
src/contrib/hdfsproxy/lib/jsp-api.jar


+ 100 - 0
src/contrib/hdfsproxy/lib/junit-3.8.1.LICENSE.txt

@@ -0,0 +1,100 @@
+Common Public License Version 1.0
+
+THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS COMMON PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
+
+1. DEFINITIONS
+
+"Contribution" means:
+
+    a) in the case of the initial Contributor, the initial code and documentation distributed under this Agreement, and
+
+    b) in the case of each subsequent Contributor:
+
+    i) changes to the Program, and
+
+    ii) additions to the Program;
+
+    where such changes and/or additions to the Program originate from and are distributed by that particular Contributor. A Contribution 'originates' from a Contributor if it was added to the Program by such Contributor itself or anyone acting on such Contributor's behalf. Contributions do not include additions to the Program which: (i) are separate modules of software distributed in conjunction with the Program under their own license agreement, and (ii) are not derivative works of the Program.
+
+"Contributor" means any person or entity that distributes the Program.
+
+"Licensed Patents " mean patent claims licensable by a Contributor which are necessarily infringed by the use or sale of its Contribution alone or when combined with the Program.
+
+"Program" means the Contributions distributed in accordance with this Agreement.
+
+"Recipient" means anyone who receives the Program under this Agreement, including all Contributors.
+
+2. GRANT OF RIGHTS
+
+    a) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, distribute and sublicense the Contribution of such Contributor, if any, and such derivative works, in source code and object code form.
+
+    b) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed Patents to make, use, sell, offer to sell, import and otherwise transfer the Contribution of such Contributor, if any, in source code and object code form. This patent license shall apply to the combination of the Contribution and the Program if, at the time the Contribution is added by the Contributor, such addition of the Contribution causes such combination to be covered by the Licensed Patents. The patent license shall not apply to any other combinations which include the Contribution. No hardware per se is licensed hereunder.
+
+    c) Recipient understands that although each Contributor grants the licenses to its Contributions set forth herein, no assurances are provided by any Contributor that the Program does not infringe the patent or other intellectual property rights of any other entity. Each Contributor disclaims any liability to Recipient for claims brought by any other entity based on infringement of intellectual property rights or otherwise. As a condition to exercising the rights and licenses granted hereunder, each Recipient hereby assumes sole responsibility to secure any other intellectual property rights needed, if any. For example, if a third party patent license is required to allow Recipient to distribute the Program, it is Recipient's responsibility to acquire that license before distributing the Program.
+
+    d) Each Contributor represents that to its knowledge it has sufficient copyright rights in its Contribution, if any, to grant the copyright license set forth in this Agreement.
+
+3. REQUIREMENTS
+
+A Contributor may choose to distribute the Program in object code form under its own license agreement, provided that:
+
+    a) it complies with the terms and conditions of this Agreement; and
+
+    b) its license agreement:
+
+    i) effectively disclaims on behalf of all Contributors all warranties and conditions, express and implied, including warranties or conditions of title and non-infringement, and implied warranties or conditions of merchantability and fitness for a particular purpose;
+
+    ii) effectively excludes on behalf of all Cntributors all liability for damages, including direct, indirect, special, incidental and consequential damages, such as lost profits;
+
+    iii) states that any provisions which differ from this Agreement are offered by that Contributor alone and not by any other party; and
+
+    iv) states that source code for the Program is available from such Contributor, and informs licensees how to obtain it in a reasonable manner on or through a medium customarily used for software exchange. 
+
+When the Program is made available in source code form:
+
+    a) it must be made available under this Agreement; and
+
+    b) a copy of this Agreement must be included with each copy of the Program. 
+
+Contributors may not remove or alter any copyright notices contained within the Program.
+
+Each Contributor must identify itself as the originator of its Contribution, if any, in a manner that reasonably allows subsequent Recipients to identify the originator of the Contribution.
+
+4. COMMERCIAL DISTRIBUTION
+
+Commercial distributors of software may accept certain responsibilities with respect to end users, business partners and the like. While this license is intended to facilitate the commercial use of the Program, the Contributor who includes the Program in a commercial product offering should do so in a manner which does not create potential liability for other Contributors. Therefore, if a Contributor includes the Program in a commercial product offering, such Contributor ("Commercial Contributor") hereby agrees to defend and indemnify every other Contributor ("Indemnified Contributor") against any losses, damages and costs (collectively "Losses") arising from claims, lawsuits and other legal actions brought by a third party against the Indemnified Contributor to the extent caused by the acts or omissions of such Commercial Contributor in connection with its distribution of the Program in a commercial product offering. The obligations in this section do not apply to any claims or Losses relating to any actual or alleged intellectual property infringement. In order to qualify, an Indemnified Contributor must: a) promptly notify the Commercial Contributor in writing of such claim, and b) allow the Commercial Contributor to control, and cooperate with the Commercial Contributor in, the defense and any related settlement negotiations. The Indemnified Contributor may participate in any such claim at its own expense.
+
+For example, a Contributor might include the Program in a commercial product offering, Product X. That Contributor is then a Commercial Contributor. If that Commercial Contributor then makes performance claims, or offers warranties related to Product X, those performance claims and warranties are such Commercial Contributor's responsibility alone. Under this section, the Commercial Contributor would have to defend claims against the other Contributors related to those performance claims and warranties, and if a court requires any other Contributor to pay any damages as a result, the Commercial Contributor must pay those damages.
+
+5. NO WARRANTY
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the appropriateness of using and distributing the Program and assumes all risks associated with its exercise of rights under this Agreement, including but not limited to the risks and costs of program errors, compliance with applicable laws, damage to or loss of data, programs or equipment, and unavailability or interruption of operations.
+
+6. DISCLAIMER OF LIABILITY
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST PR LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+7. GENERAL
+
+If any provision of this Agreement is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this Agreement, and without further action by the parties hereto, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable.
+
+If Recipient institutes patent litigation against a Contributor with respect to a patent applicable to software (including a cross-claim or counterclaim in a lawsuit), then any patent licenses granted by that Contributor to such Recipient under this Agreement shall terminate as of the date such litigation is filed. In addition, if Recipient institutes patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Program itself (excluding combinations of the Program with other software or hardware) infringes such Recipient's patent(s), then such Recipient's rights granted under Section 2(b) shall terminate as of the date such litigation is filed.
+
+All Recipient's rights under this Agreement shall terminate if it fails to comply with any of the material terms or conditions of this Agreement and does not cure such failure in a reasonable period of time after becoming aware of such noncompliance. If all Recipient's rights under this Agreement terminate, Recipient agrees to cease use and distribution of the Program as soon as reasonably practicable. However, Recipient's obligations under this Agreement and any licenses granted by Recipient relating to the Program shall continue and survive.
+
+Everyone is permitted to copy and distribute copies of this Agreement, but in order to avoid inconsistency the Agreement is copyrighted and may only be modified in the following manner. The Agreement Steward reserves the right to publish new versions (including revisions) of this Agreement from time to time. No one other than the Agreement Steward has the right to modify this Agreement. IBM is the initial Agreement Steward. IBM may assign the responsibility to serve as the Agreement Steward to a suitable separate entity. Each new version of the Agreement will be given a distinguishing version number. The Program (including Contributions) may always be distributed subject to the version of the Agreement under which it was received. In addition, after a new version of the Agreement is published, Contributor may elect to distribute the Program (including its Contributions) under the new version. Except as expressly stated in Sections 2(a) and 2(b) above, Recipient receives no rights or licenses to the intellectual property of any Contributor under this Agreement, whether expressly, by implication, estoppel or otherwise. All rights in the Program not expressly granted under this Agreement are reserved.
+
+This Agreement is governed by the laws of the State of New York and the intellectual property laws of the United States of America. No party to this Agreement will bring a legal action under this Agreement more than one year after the cause of action arose. Each party waives its rights to a jury trial in any resulting litigation.
+OFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+7. GENERAL
+
+If any provision of this Agreement is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this Agreement, and without further action by the parties hereto, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable.
+
+If Recipient institutes patent litigation against a Contributor with respect to a patent applicable to software (including a cross-claim or counterclaim in a lawsuit), then any patent licenses granted by that Contributor to such Recipient under this Agreement shall terminate as of the date such litigation is filed. In addition, if Recipient institutes patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Program itself (excluding combinations of the Program with other software or hardware) infringes such Recipient's patent(s), then such Recipient's rights granted under Section 2(b) shall terminate as of the date such litigation is filed.
+
+All Recipient's rights under this Agreement shall terminate if it fails to comply with any of the material terms or conditions of this Agreement and does not cure such failure in a reasonable period of time after becoming aware of such noncompliance. If all Recipient's rights under this Agreement terminate, Recipient agrees to cease use and distribution of the Program as soon as reasonably practicable. However, Recipient's obligations under this Agreement and any licenses granted by Recipient relating to the Program shall continue and survive.
+
+Everyone is permitted to copy and distribute copies of this Agreement, but in order to avoid inconsistency the Agreement is copyrighted and may only be modified in the following manner. The Agreement Steward reserves the right to publish new versions (including revisions) of this Agreement from time to time. No one other than the Agreement Steward has the right to modify this Agreement. IBM is the initial Agreement Steward. IBM may assign the responsibility to serve as the Agreement Steward to a suitable separate entity. Each new version of the Agreement will be given a distinguishing version number. The Program (including Contributions) may always be distributed subject to the version of the Agreement under which it was received. In addition, after a new version of the Agreement is published, Contributor may elect to distribute the Program (including its Contributions) under the new version. Except as expressly stated in Sections 2(a) and 2(b) above, Recipient receives no rights or licenses to the intellectual property of any Contributor under this Agreement, whether expressly, by implication, estoppel or otherwise. All rights in the Program not expressly granted under this Agreement are reserved.
+
+This Agreement is governed by the laws of the State of New York and the intellectual property laws of the United States of America. No party to this Agreement will bring a legal action under this Agreement more than one year after the cause of action arose. Each party waives its rights to a jury trial in any resulting litigation.

BIN
src/contrib/hdfsproxy/lib/junit-3.8.1.jar


BIN
src/contrib/hdfsproxy/lib/log4j-1.2.15.jar


BIN
src/contrib/hdfsproxy/lib/servlet-api.jar


+ 24 - 0
src/contrib/hdfsproxy/lib/slf4j-LICENSE.txt

@@ -0,0 +1,24 @@
+Copyright (c) 2004-2008 QOS.ch
+All rights reserved.
+
+Permission is hereby granted, free  of charge, to any person obtaining
+a  copy  of this  software  and  associated  documentation files  (the
+"Software"), to  deal in  the Software without  restriction, including
+without limitation  the rights to  use, copy, modify,  merge, publish,
+distribute,  sublicense, and/or sell  copies of  the Software,  and to
+permit persons to whom the Software  is furnished to do so, subject to
+the following conditions:
+
+The  above  copyright  notice  and  this permission  notice  shall  be
+included in all copies or substantial portions of the Software.
+
+THE  SOFTWARE IS  PROVIDED  "AS  IS", WITHOUT  WARRANTY  OF ANY  KIND,
+EXPRESS OR  IMPLIED, INCLUDING  BUT NOT LIMITED  TO THE  WARRANTIES OF
+MERCHANTABILITY,    FITNESS    FOR    A   PARTICULAR    PURPOSE    AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE,  ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+

BIN
src/contrib/hdfsproxy/lib/slf4j-api-1.4.3.jar


BIN
src/contrib/hdfsproxy/lib/slf4j-log4j12-1.4.3.jar


BIN
src/contrib/hdfsproxy/lib/xmlenc-0.52.jar


+ 293 - 0
src/contrib/hdfsproxy/src/java/org/apache/hadoop/hdfsproxy/HdfsProxy.java

@@ -0,0 +1,293 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfsproxy;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.Set;
+
+import javax.net.ssl.HttpsURLConnection;
+import javax.net.ssl.HostnameVerifier;
+import javax.net.ssl.SSLSession;
+import javax.servlet.http.HttpServletResponse;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.util.HostsFileReader;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * A HTTPS/SSL proxy to HDFS, implementing certificate based access control.
+ */
+public class HdfsProxy {
+  public static final Log LOG = LogFactory.getLog(HdfsProxy.class);
+
+  private ProxyHttpServer server;
+  private InetSocketAddress sslAddr;
+  
+  /** Construct a proxy from the given configuration */
+  public HdfsProxy(Configuration conf) throws IOException {
+    try {
+      initialize(conf);
+    } catch (IOException e) {
+      this.stop();
+      throw e;
+    }
+  }
+  
+  private void initialize(Configuration conf) throws IOException {
+    sslAddr = getSslAddr(conf);
+    String nn = conf.get("hdfsproxy.dfs.namenode.address");
+    if (nn == null)
+      throw new IOException("HDFS NameNode address is not specified");
+    InetSocketAddress nnAddr = NetUtils.createSocketAddr(nn);
+    LOG.info("HDFS NameNode is at: " + nnAddr.getHostName() + ":" + nnAddr.getPort());
+
+    this.server = new ProxyHttpServer();
+    this.server.setAttribute("proxy.https.port", sslAddr.getPort());
+    this.server.setAttribute("name.node.address", nnAddr);
+    this.server.setAttribute("name.conf", new Configuration());
+    this.server.addGlobalFilter("ProxyFilter", ProxyFilter.class.getName(), null);
+    this.server.addServlet("listPaths", "/listPaths/*", ProxyListPathsServlet.class);
+    this.server.addServlet("data", "/data/*", ProxyFileDataServlet.class);
+    this.server.addServlet("streamFile", "/streamFile/*", ProxyStreamFile.class);
+  }
+  
+  /** add an SSL listener */
+  private void addSslListener(Configuration conf) throws IOException {
+    Configuration sslConf = new Configuration(false);
+    sslConf.addResource(conf.get("hdfsproxy.https.server.keystore.resource",
+        "ssl-server.xml"));
+    server.addSslListener(sslAddr, sslConf);
+  }
+  
+  /** add an http listener, only for testing purposes */
+  void addListener(InetSocketAddress addr, boolean findPort)
+      throws IOException {
+    this.server.addListener(addr, findPort);
+    LOG.warn("An HTTP listener is attached to the proxy server. " +
+    		"It should only be used for testing purposes.");
+  }
+  
+  /** return the http port if any, only for testing purposes */
+  int getPort() throws IOException {
+    return server.getPort();
+  }
+  
+  /**
+   * Start the server.
+   */
+  public void start() throws IOException {
+    this.server.start();
+    LOG.info("HdfsProxy server up at: " + sslAddr.getHostName() + ":"
+        + sslAddr.getPort());
+  }
+  
+  /**
+   * Stop all server threads and wait for all to finish.
+   */
+  public void stop() {
+    try {
+      if (server != null) {
+        server.stop();
+        server.join();
+      }
+    } catch (InterruptedException ie) {
+    }
+  }
+  
+  /**
+   * Wait for service to finish.
+   * (Normally, it runs forever.)
+   */
+  public void join() {
+    try {
+      this.server.join();
+    } catch (InterruptedException ie) {
+    }
+  }
+  
+  private static enum StartupOption {
+    RELOAD("-reloadPermFiles"), CLEAR("-clearUgiCache"), REGULAR("-regular");
+
+    private String name = null;
+
+    private StartupOption(String arg) {
+      this.name = arg;
+    }
+
+    public String getName() {
+      return name;
+    }
+  }
+
+  private static void printUsage() {
+    System.err.println("Usage: hdfsproxy ["
+        + StartupOption.RELOAD.getName() + "] | ["
+        + StartupOption.CLEAR.getName() + "]");
+  }
+
+  private static StartupOption parseArguments(String args[]) {
+    int argsLen = (args == null) ? 0 : args.length;
+    StartupOption startOpt = StartupOption.REGULAR;
+    for (int i = 0; i < argsLen; i++) {
+      String cmd = args[i];
+      if (StartupOption.RELOAD.getName().equalsIgnoreCase(cmd)) {
+        startOpt = StartupOption.RELOAD;
+      } else if (StartupOption.CLEAR.getName().equalsIgnoreCase(cmd)) {
+        startOpt = StartupOption.CLEAR;
+      } else if (StartupOption.REGULAR.getName().equalsIgnoreCase(cmd)) {
+        startOpt = StartupOption.REGULAR;
+      } else
+        return null;
+    }
+    return startOpt;
+  }
+
+  /**
+   * Dummy hostname verifier that is used to bypass hostname checking
+   */
+  private static class DummyHostnameVerifier implements HostnameVerifier {
+    public boolean verify(String hostname, SSLSession session) {
+      return true;
+    }
+  }
+
+  private static HttpsURLConnection openConnection(String hostname, int port,
+      String path) throws IOException {
+    try {
+      final URL url = new URI("https", null, hostname, port, path, null, null)
+          .toURL();
+      HttpsURLConnection conn = (HttpsURLConnection) url.openConnection();
+      // bypass hostname verification
+      conn.setHostnameVerifier(new DummyHostnameVerifier());
+      conn.setRequestMethod("GET");
+      return conn;
+    } catch (URISyntaxException e) {
+      throw (IOException) new IOException().initCause(e);
+    }
+  }
+
+  private static void setupSslProps(Configuration conf) {
+    Configuration sslConf = new Configuration(false);
+    sslConf.addResource(conf.get("hdfsproxy.https.server.keystore.resource",
+        "ssl-server.xml"));
+    System.setProperty("javax.net.ssl.trustStore", sslConf
+        .get("ssl.server.truststore.location"));
+    System.setProperty("javax.net.ssl.trustStorePassword", sslConf.get(
+        "ssl.server.truststore.password", ""));
+    System.setProperty("javax.net.ssl.trustStoreType", sslConf.get(
+        "ssl.server.truststore.type", "jks"));
+    System.setProperty("javax.net.ssl.keyStore", sslConf
+        .get("ssl.server.keystore.location"));
+    System.setProperty("javax.net.ssl.keyStorePassword", sslConf.get(
+        "ssl.server.keystore.password", ""));
+    System.setProperty("javax.net.ssl.keyPassword", sslConf.get(
+        "ssl.server.keystore.keypassword", ""));
+    System.setProperty("javax.net.ssl.keyStoreType", sslConf.get(
+        "ssl.server.keystore.type", "jks"));
+  }
+
+  private static InetSocketAddress getSslAddr(Configuration conf) throws IOException {
+    String addr = conf.get("hdfsproxy.https.address");
+    if (addr == null)
+      throw new IOException("HdfsProxy address is not specified");
+    return NetUtils.createSocketAddr(addr);
+  }
+
+  private static boolean sendCommand(Configuration conf, String path)
+      throws IOException {
+    setupSslProps(conf);
+    int sslPort = getSslAddr(conf).getPort();
+    int err = 0;
+    StringBuilder b = new StringBuilder();
+    HostsFileReader hostsReader = new HostsFileReader(conf.get("hdfsproxy.hosts",
+        "hdfsproxy-hosts"), "");
+    Set<String> hostsList = hostsReader.getHosts();
+    for (String hostname : hostsList) {
+      HttpsURLConnection connection = null;
+      try {
+        connection = openConnection(hostname, sslPort, path);
+        connection.connect();
+        if (connection.getResponseCode() != HttpServletResponse.SC_OK) {
+          b.append("\n\t" + hostname + ": " + connection.getResponseCode()
+              + " " + connection.getResponseMessage());
+          err++;
+        }
+      } catch (IOException e) {
+        b.append("\n\t" + hostname + ": " + e.getLocalizedMessage());
+        err++;
+      } finally {
+        if (connection != null)
+          connection.disconnect();
+      }
+    }
+    if (err > 0) {
+      System.err.print("Command failed on the following "
+          + err + " host" + (err==1?":":"s:") + b.toString() + "\n");
+      return true;
+    }
+    return false;
+  }
+
+  public static HdfsProxy createHdfsProxy(String argv[], Configuration conf)
+      throws IOException {
+    if (conf == null) {
+      conf = new Configuration(false);
+      conf.addResource("hdfsproxy-default.xml");
+    }
+    StartupOption startOpt = parseArguments(argv);
+    if (startOpt == null) {
+      printUsage();
+      return null;
+    }
+
+    switch (startOpt) {
+    case RELOAD:
+      boolean error = sendCommand(conf, "/reloadPermFiles");
+      System.exit(error ? 1 : 0);
+    case CLEAR:
+      error = sendCommand(conf, "/clearUgiCache");
+      System.exit(error ? 1 : 0);
+    default:
+    }
+
+    StringUtils.startupShutdownMessage(HdfsProxy.class, argv, LOG);
+    HdfsProxy proxy = new HdfsProxy(conf);
+    proxy.addSslListener(conf);
+    proxy.start();
+    return proxy;
+  }
+
+  public static void main(String[] argv) throws Exception {
+    try {
+      HdfsProxy proxy = createHdfsProxy(argv, null);
+      if (proxy != null)
+        proxy.join();
+    } catch (Throwable e) {
+      LOG.error(StringUtils.stringifyException(e));
+      System.exit(-1);
+    }
+  }
+}

+ 51 - 0
src/contrib/hdfsproxy/src/java/org/apache/hadoop/hdfsproxy/ProxyFileDataServlet.java

@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfsproxy;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+import javax.servlet.http.HttpServletRequest;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.hdfs.protocol.ClientProtocol;
+import org.apache.hadoop.hdfs.server.namenode.FileDataServlet;
+import org.apache.hadoop.security.UnixUserGroupInformation;
+
+/** {@inheritDoc} */
+public class ProxyFileDataServlet extends FileDataServlet {
+  /** For java.io.Serializable */
+  private static final long serialVersionUID = 1L;
+
+  /** {@inheritDoc} */
+  @Override
+  protected URI createUri(FileStatus i, UnixUserGroupInformation ugi,
+      ClientProtocol nnproxy, HttpServletRequest request) throws IOException,
+      URISyntaxException {
+    return new URI(request.getScheme(), null, request.getServerName(), request
+        .getServerPort(), "/streamFile", "filename=" + i.getPath() + "&ugi="
+        + ugi, null);
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  protected UnixUserGroupInformation getUGI(HttpServletRequest request) {
+    return (UnixUserGroupInformation) request.getAttribute("authorized.ugi");
+  }
+}

+ 330 - 0
src/contrib/hdfsproxy/src/java/org/apache/hadoop/hdfsproxy/ProxyFilter.java

@@ -0,0 +1,330 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfsproxy;
+
+import java.io.IOException;
+import java.math.BigInteger;
+import java.security.cert.X509Certificate;
+import java.security.cert.CertificateExpiredException;
+import java.security.cert.CertificateNotYetValidException;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import javax.servlet.Filter;
+import javax.servlet.FilterChain;
+import javax.servlet.FilterConfig;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import javax.servlet.ServletException;
+import javax.servlet.ServletRequest;
+import javax.servlet.ServletResponse;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.security.UnixUserGroupInformation;
+
+public class ProxyFilter implements Filter {
+  public static final Log LOG = LogFactory.getLog(ProxyFilter.class);
+
+  /** Pattern for triggering reload of user permissions */
+  protected static final Pattern RELOAD_PATTERN = Pattern
+      .compile("^(/reloadPermFiles)$");
+  /** Pattern for triggering clearing of ugi Cache */
+  protected static final Pattern CLEAR_PATTERN = Pattern
+      .compile("^(/clearUgiCache)$");
+  /** Pattern for a filter to find out if a request is HFTP/HSFTP request */
+  protected static final Pattern HFTP_PATTERN = Pattern
+      .compile("^(/listPaths|/data|/streamFile)$");
+  /**
+   * Pattern for a filter to find out if an HFTP/HSFTP request stores its file
+   * path in the extra path information associated with the URL; if not, the
+   * file path is stored in request parameter "filename"
+   */
+  protected static final Pattern FILEPATH_PATTERN = Pattern
+      .compile("^(/listPaths|/data)$");
+
+  private static volatile Map<String, Set<Path>> permsMap;
+  private static volatile Map<String, Set<BigInteger>> certsMap;
+  static {
+    Configuration conf = new Configuration(false);
+    conf.addResource("hdfsproxy-default.xml");
+    Map<String, Set<Path>> pMap = getPermMap(conf);
+    permsMap = pMap != null ? pMap : new HashMap<String, Set<Path>>();
+    Map<String, Set<BigInteger>> cMap = getCertsMap(conf);
+    certsMap = cMap != null ? cMap : new HashMap<String, Set<BigInteger>>();
+  }
+
+  /** {@inheritDoc} */
+  public void init(FilterConfig filterConfig) throws ServletException {
+  }
+
+  private static Map<String, Set<Path>> getPermMap(Configuration conf) {
+    String permLoc = conf.get("hdfsproxy.user.permissions.file.location",
+        "user-permissions.xml");
+    if (conf.getResource(permLoc) == null) {
+      LOG.warn("HdfsProxy user permissions file not found");
+      return null;
+    }
+    Configuration permConf = new Configuration(false);
+    permConf.addResource(permLoc);
+    Map<String, Set<Path>> map = new HashMap<String, Set<Path>>();
+    for (Map.Entry<String, String> e : permConf) {
+      String k = e.getKey();
+      String v = e.getValue();
+      if (k != null && k.length() != 0 && v != null && v.length() != 0) {
+        Set<Path> pathSet = new HashSet<Path>();
+        String[] paths = v.split(",\\s*");
+        for (String p : paths) {
+          if (p.length() != 0) {
+            pathSet.add(new Path(p));
+          }
+        }
+        map.put(k, pathSet);
+      }
+    }
+    return map;
+  }
+
+  private static Map<String, Set<BigInteger>> getCertsMap(Configuration conf) {
+    String certsLoc = conf.get("hdfsproxy.user.certs.file.location",
+        "user-certs.xml");
+    if (conf.getResource(certsLoc) == null) {
+      LOG.warn("HdfsProxy user certs file not found");
+      return null;
+    }
+    Configuration certsConf = new Configuration(false);
+    certsConf.addResource(certsLoc);
+    Map<String, Set<BigInteger>> map = new HashMap<String, Set<BigInteger>>();
+    for (Map.Entry<String, String> e : certsConf) {
+      String k = e.getKey();
+      String v = e.getValue().trim();
+      if (k != null && k.length() != 0 && v != null && v.length() != 0) {
+        Set<BigInteger> numSet = new HashSet<BigInteger>();
+        String[] serialnumbers = v.split("\\s*,\\s*");
+        for (String num : serialnumbers) {
+          if (num.length() != 0) {
+            numSet.add(new BigInteger(num, 16));
+          }
+        }
+        map.put(k, numSet);
+      }
+    }
+    return map;
+  }
+
+  /** {@inheritDoc} */
+  public void destroy() {
+  }
+
+  /** {@inheritDoc} */
+  public void doFilter(ServletRequest request, ServletResponse response,
+      FilterChain chain) throws IOException, ServletException {
+
+    HttpServletRequest rqst = (HttpServletRequest) request;
+    HttpServletResponse rsp = (HttpServletResponse) response;
+
+    if (LOG.isDebugEnabled()) {
+      StringBuilder b = new StringBuilder("Request from ").append(
+          rqst.getRemoteHost()).append("/").append(rqst.getRemoteAddr())
+          .append(":").append(rqst.getRemotePort());
+
+      @SuppressWarnings("unchecked")
+      Enumeration<String> e = rqst.getAttributeNames();
+      for (; e.hasMoreElements();) {
+        String attribute = e.nextElement();
+        b.append("\n  " + attribute + " => " + rqst.getAttribute(attribute));
+      }
+
+      X509Certificate[] userCerts = (X509Certificate[]) rqst
+          .getAttribute("javax.servlet.request.X509Certificate");
+      if (userCerts != null)
+        for (X509Certificate cert : userCerts)
+          b.append("\n Client certificate Subject Name is "
+              + cert.getSubjectX500Principal().getName());
+
+      b.append("\n The Scheme is " + rqst.getScheme());
+      b.append("\n The Auth Type is " + rqst.getAuthType());
+      b.append("\n The Path Info is " + rqst.getPathInfo());
+      b.append("\n The Translated Path Info is " + rqst.getPathTranslated());
+      b.append("\n The Context Path is " + rqst.getContextPath());
+      b.append("\n The Query String is " + rqst.getQueryString());
+      b.append("\n The Remote User is " + rqst.getRemoteUser());
+      b.append("\n The User Principal is " + rqst.getUserPrincipal());
+      b.append("\n The Request URI is " + rqst.getRequestURI());
+      b.append("\n The Request URL is " + rqst.getRequestURL());
+      b.append("\n The Servlet Path is " + rqst.getServletPath());
+
+      LOG.debug(b.toString());
+    }
+
+    if (rqst.getScheme().equalsIgnoreCase("https")) {
+      boolean isAuthorized = false;
+      X509Certificate[] certs = (X509Certificate[]) rqst
+          .getAttribute("javax.servlet.request.X509Certificate");
+      if (certs == null || certs.length == 0) {
+        rsp.sendError(HttpServletResponse.SC_BAD_REQUEST,
+            "No client SSL certificate received");
+        return;
+      }
+      for (X509Certificate cert : certs) {
+        try {
+          cert.checkValidity();
+        } catch (CertificateExpiredException e) {
+          LOG.info("Received cert for "
+              + cert.getSubjectX500Principal().getName() + " expired");
+          rsp
+              .sendError(HttpServletResponse.SC_FORBIDDEN,
+                  "Certificate expired");
+          return;
+        } catch (CertificateNotYetValidException e) {
+          LOG.info("Received cert for "
+              + cert.getSubjectX500Principal().getName() + " is not yet valid");
+          rsp.sendError(HttpServletResponse.SC_FORBIDDEN,
+              "Certificate is not yet valid");
+          return;
+        }
+      }
+
+      String[] tokens = certs[0].getSubjectX500Principal().getName().split(
+          "\\s*,\\s*");
+      String userID = null;
+      for (String s : tokens) {
+        if (s.startsWith("CN=")) {
+          userID = s;
+          break;
+        }
+      }
+      if (userID == null || userID.length() < 4) {
+        LOG.info("Can't retrieve user ID from SSL certificate");
+        rsp.sendError(HttpServletResponse.SC_FORBIDDEN,
+            "Can't retrieve user ID from SSL certificate");
+        return;
+      }
+      userID = userID.substring(3);
+
+      String servletPath = rqst.getServletPath();
+      if (HFTP_PATTERN.matcher(servletPath).matches()) {
+        // request is an HSFTP request
+        if (FILEPATH_PATTERN.matcher(servletPath).matches()) {
+          // file path as part of the URL
+          isAuthorized = checkPath(userID, certs[0],
+              rqst.getPathInfo() != null ? rqst.getPathInfo() : "/");
+        } else {
+          // file path is stored in "filename" parameter
+          isAuthorized = checkPath(userID, certs[0], rqst
+              .getParameter("filename"));
+        }
+      } else if (RELOAD_PATTERN.matcher(servletPath).matches()
+          && checkUser("Admin", certs[0])) {
+        Configuration conf = new Configuration(false);
+        conf.addResource("hdfsproxy-default.xml");
+        Map<String, Set<Path>> permsMap = getPermMap(conf);
+        Map<String, Set<BigInteger>> certsMap = getCertsMap(conf);
+        if (permsMap == null || certsMap == null) {
+          LOG.warn("Permission files reloading failed");
+          rsp.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,
+              "Permission files reloading failed");
+          return;
+        }
+        ProxyFilter.permsMap = permsMap;
+        ProxyFilter.certsMap = certsMap;
+        LOG.info("User permissions and user certs files reloaded");
+        rsp.setStatus(HttpServletResponse.SC_OK);
+        return;
+      } else if (CLEAR_PATTERN.matcher(servletPath).matches()
+          && checkUser("Admin", certs[0])) {
+        ProxyUgiManager.clearCache();
+        LOG.info("Ugi cache cleared");
+        rsp.setStatus(HttpServletResponse.SC_OK);
+        return;
+      }
+
+      if (!isAuthorized) {
+        rsp.sendError(HttpServletResponse.SC_FORBIDDEN, "Unauthorized access");
+        return;
+      }
+      // request is authorized, set ugi for servlets
+      UnixUserGroupInformation ugi = ProxyUgiManager
+          .getUgiForUser(userID);
+      if (ugi == null) {
+        LOG.info("Can't retrieve ugi for user " + userID);
+        rsp.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,
+            "Can't retrieve ugi for user " + userID);
+        return;
+      }
+      rqst.setAttribute("authorized.ugi", ugi);
+    } else { // http request, set ugi for servlets, only for testing purposes
+      String ugi = rqst.getParameter("ugi");
+      rqst.setAttribute("authorized.ugi", new UnixUserGroupInformation(ugi
+          .split(",")));
+    }
+
+    chain.doFilter(request, response);
+  }
+
+  /** check that client's cert is listed in the user certs file */
+  private boolean checkUser(String userID, X509Certificate cert) {
+    Set<BigInteger> numSet = certsMap.get(userID);
+    if (numSet == null) {
+      LOG.info("User " + userID + " is not configured in the user certs file");
+      return false;
+    }
+    if (!numSet.contains(cert.getSerialNumber())) {
+      LOG.info("Cert with serial number " + cert.getSerialNumber()
+          + " is not listed for user " + userID);
+      return false;
+    }
+    return true;
+  }
+
+  /** check that the requested path is listed in the user permissions file */
+  private boolean checkPath(String userID, X509Certificate cert, String pathInfo) {
+    if (!checkUser(userID, cert)) {
+      return false;
+    }
+
+    Set<Path> pathSet = permsMap.get(userID);
+    if (pathSet == null) {
+      LOG.info("User " + userID
+              + " is not listed in the user permissions file");
+      return false;
+    }
+    if (pathInfo == null || pathInfo.length() == 0) {
+      LOG.info("Can't get file path from HTTPS request; user is " + userID);
+      return false;
+    }
+
+    Path userPath = new Path(pathInfo);
+    while (userPath != null) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("\n Checking file path " + userPath);
+      }
+      if (pathSet.contains(userPath))
+        return true;
+      userPath = userPath.getParent();
+    }
+    LOG.info("User " + userID + " is not authorized to access " + pathInfo);
+    return false;
+  }
+}

+ 252 - 0
src/contrib/hdfsproxy/src/java/org/apache/hadoop/hdfsproxy/ProxyHttpServer.java

@@ -0,0 +1,252 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfsproxy;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.Map;
+
+import javax.servlet.http.HttpServlet;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+
+import org.mortbay.http.SocketListener;
+import org.mortbay.http.SslListener;
+import org.mortbay.jetty.servlet.Dispatcher;
+import org.mortbay.jetty.servlet.FilterHolder;
+import org.mortbay.jetty.servlet.WebApplicationContext;
+import org.mortbay.jetty.servlet.WebApplicationHandler;
+
+/**
+ * Create a Jetty embedded server to answer http/https requests.
+ */
+public class ProxyHttpServer {
+  public static final Log LOG = LogFactory.getLog(ProxyHttpServer.class);
+
+  protected final org.mortbay.jetty.Server webServer;
+  protected final WebApplicationContext webAppContext;
+  protected SslListener sslListener;
+  protected SocketListener listener;
+  protected boolean findPort;
+
+  /**
+   * Create a status server on the given port.
+   * 
+   * @param name
+   *            The name of the server
+   * @param port
+   *            The port to use on the server
+   * @param conf
+   *            Configuration
+   */
+  public ProxyHttpServer() throws IOException {
+    webServer = new org.mortbay.jetty.Server();
+    webAppContext = webServer.addWebApplication("/", "/");
+  }
+
+  /**
+   * Add a servlet to the server.
+   * 
+   * @param name
+   *            The name of the servlet (can be passed as null)
+   * @param pathSpec
+   *            The path spec for the servlet
+   * @param clazz
+   *            The servlet class
+   */
+  public void addServlet(String name, String pathSpec,
+      Class<? extends HttpServlet> clazz) {
+    try {
+      if (name == null) {
+        webAppContext.addServlet(pathSpec, clazz.getName());
+      } else {
+        webAppContext.addServlet(name, pathSpec, clazz.getName());
+      }
+    } catch (ClassNotFoundException cnfe) {
+      throw new RuntimeException("Problem instantiating class", cnfe);
+    } catch (InstantiationException ie) {
+      throw new RuntimeException("Problem instantiating class", ie);
+    } catch (IllegalAccessException iae) {
+      throw new RuntimeException("Problem instantiating class", iae);
+    }
+  }
+
+  /** add a global filter */
+  public void addGlobalFilter(String name, String classname,
+      Map<String, String> parameters) {
+    final String[] ALL_URLS = { "/*" };
+    defineFilter(webAppContext, name, classname, parameters, ALL_URLS);
+    LOG.info("Added global filter" + name + " (class=" + classname + ")");
+  }
+
+  /**
+   * Define a filter for a context and set up default url mappings.
+   */
+  protected void defineFilter(WebApplicationContext ctx, String name,
+      String classname, Map<String, String> parameters, String[] urls) {
+    WebApplicationHandler handler = ctx.getWebApplicationHandler();
+    FilterHolder holder = handler.defineFilter(name, classname);
+    if (parameters != null) {
+      for (Map.Entry<String, String> e : parameters.entrySet()) {
+        holder.setInitParameter(e.getKey(), e.getValue());
+      }
+    }
+    for (String url : urls) {
+      handler.addFilterPathMapping(url, name, Dispatcher.__ALL);
+    }
+  }
+
+  /**
+   * Set a value in the webapp context.
+   * 
+   * @param name
+   *            The name of the attribute
+   * @param value
+   *            The value of the attribute
+   */
+  public void setAttribute(String name, Object value) {
+    webAppContext.setAttribute(name, value);
+  }
+
+  /**
+   * Get the value in the webapp context.
+   * 
+   * @param name
+   *            The name of the attribute
+   * @return The value of the attribute
+   */
+  public Object getAttribute(String name) {
+    return webAppContext.getAttribute(name);
+  }
+
+  /** return the http port that the server is on */
+  public int getPort() throws IOException {
+    if (listener == null)
+      throw new IOException("No http listerner found");
+    return listener.getPort();
+  }
+
+  public void setThreads(int min, int max) {
+    sslListener.setMinThreads(min);
+    sslListener.setMaxThreads(max);
+  }
+
+  /**
+   * Configure an http listener on the server
+   * 
+   * @param addr
+   *            address to listen on
+   * @param findPort
+   *            whether the listener should bind the given port and increment by
+   *            1 until it finds a free port
+   */
+  public void addListener(InetSocketAddress addr, boolean findPort)
+      throws IOException {
+    if (listener != null || webServer.isStarted()) {
+      throw new IOException("Failed to add listener");
+    }
+    this.findPort = findPort;
+    listener = new SocketListener();
+    listener.setHost(addr.getHostName());
+    listener.setPort(addr.getPort());
+    webServer.addListener(listener);
+  }
+
+  /**
+   * Configure an ssl listener on the server.
+   * 
+   * @param addr
+   *            address to listen on
+   * @param sslConf
+   *            conf to retrieve SSL properties from
+   */
+  public void addSslListener(InetSocketAddress addr, Configuration sslConf)
+      throws IOException {
+    if (sslListener != null || webServer.isStarted()) {
+      throw new IOException("Failed to add ssl listener");
+    }
+    sslListener = new SslListener();
+    sslListener.setHost(addr.getHostName());
+    sslListener.setPort(addr.getPort());
+    sslListener.setKeystore(sslConf.get("ssl.server.keystore.location"));
+    sslListener.setPassword(sslConf.get("ssl.server.keystore.password", ""));
+    sslListener.setKeyPassword(sslConf.get("ssl.server.keystore.keypassword",
+        ""));
+    sslListener.setKeystoreType(sslConf.get("ssl.server.keystore.type", "jks"));
+    sslListener.setNeedClientAuth(true);
+    webServer.addListener(sslListener);
+    System.setProperty("javax.net.ssl.trustStore", sslConf
+        .get("ssl.server.truststore.location"));
+    System.setProperty("javax.net.ssl.trustStorePassword", sslConf.get(
+        "ssl.server.truststore.password", ""));
+    System.setProperty("javax.net.ssl.trustStoreType", sslConf.get(
+        "ssl.server.truststore.type", "jks"));
+  }
+
+  /**
+   * Start the server. Does not wait for the server to start.
+   */
+  public void start() throws IOException {
+    try {
+      while (true) {
+        try {
+          webServer.start();
+          break;
+        } catch (org.mortbay.util.MultiException ex) {
+          // if the multi exception contains ONLY a bind exception,
+          // then try the next port number.
+          boolean needNewPort = false;
+          if (ex.size() == 1) {
+            Exception sub = ex.getException(0);
+            if (sub instanceof java.net.BindException) {
+              if (!findPort || listener == null)
+                throw sub; // java.net.BindException
+              needNewPort = true;
+            }
+          }
+          if (!needNewPort)
+            throw ex;
+          listener.setPort(listener.getPort() + 1);
+        }
+      }
+    } catch (IOException ie) {
+      throw ie;
+    } catch (Exception e) {
+      IOException ie = new IOException("Problem starting http server");
+      ie.initCause(e);
+      throw ie;
+    }
+  }
+
+  /**
+   * stop the server
+   */
+  public void stop() throws InterruptedException {
+    webServer.stop();
+  }
+
+  /**
+   * wait for the server
+   */
+  public void join() throws InterruptedException {
+    webServer.join();
+  }
+}

+ 35 - 0
src/contrib/hdfsproxy/src/java/org/apache/hadoop/hdfsproxy/ProxyListPathsServlet.java

@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfsproxy;
+
+import javax.servlet.http.HttpServletRequest;
+
+import org.apache.hadoop.hdfs.server.namenode.ListPathsServlet;
+import org.apache.hadoop.security.UnixUserGroupInformation;
+
+/** {@inheritDoc} */
+public class ProxyListPathsServlet extends ListPathsServlet {
+  /** For java.io.Serializable */
+  private static final long serialVersionUID = 1L;
+
+  /** {@inheritDoc} */
+  @Override
+  protected UnixUserGroupInformation getUGI(HttpServletRequest request) {
+    return (UnixUserGroupInformation) request.getAttribute("authorized.ugi");
+  }
+}

+ 55 - 0
src/contrib/hdfsproxy/src/java/org/apache/hadoop/hdfsproxy/ProxyStreamFile.java

@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfsproxy;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+
+import javax.servlet.ServletContext;
+import javax.servlet.http.HttpServletRequest;
+
+import org.apache.hadoop.hdfs.DFSClient;
+import org.apache.hadoop.hdfs.server.namenode.StreamFile;
+import org.apache.hadoop.security.UnixUserGroupInformation;
+import org.apache.hadoop.conf.Configuration;
+
+/** {@inheritDoc} */
+public class ProxyStreamFile extends StreamFile {
+  /** For java.io.Serializable */
+  private static final long serialVersionUID = 1L;
+
+  /** {@inheritDoc} */
+  @Override
+  protected DFSClient getDFSClient(HttpServletRequest request)
+      throws IOException {
+    ServletContext context = getServletContext();
+    Configuration conf = new Configuration((Configuration) context
+        .getAttribute("name.conf"));
+    UnixUserGroupInformation.saveToConf(conf,
+        UnixUserGroupInformation.UGI_PROPERTY_NAME, getUGI(request));
+    InetSocketAddress nameNodeAddr = (InetSocketAddress) context
+        .getAttribute("name.node.address");
+    return new DFSClient(nameNodeAddr, conf);
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  protected UnixUserGroupInformation getUGI(HttpServletRequest request) {
+    return (UnixUserGroupInformation) request.getAttribute("authorized.ugi");
+  }
+}

+ 152 - 0
src/contrib/hdfsproxy/src/java/org/apache/hadoop/hdfsproxy/ProxyUgiManager.java

@@ -0,0 +1,152 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfsproxy;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.security.UnixUserGroupInformation;
+import org.apache.hadoop.util.Shell;
+
+/** An ugi manager that maintains a temporary ugi cache */
+public class ProxyUgiManager {
+  private static final Map<String, CachedUgi> ugiCache = new HashMap<String, CachedUgi>();
+  private static long ugiLifetime;
+  /** username can only comprise of 0-9a-zA-Z and underscore, i.e. \w */
+  private static final Pattern USERNAME_PATTERN = Pattern.compile("^\\w+$");
+  static final int CLEANUP_THRESHOLD = 1000;
+
+  static {
+    Configuration conf = new Configuration(false);
+    conf.addResource("hdfsproxy-default.xml");
+    ugiLifetime = conf.getLong("hdfsproxy.ugi.cache.ugi.lifetime", 15) * 60 * 1000L;
+  }
+
+  /**
+   * retrieve an ugi for a user. try the cache first, if not found, get it by
+   * running a shell command
+   */
+  public static synchronized UnixUserGroupInformation getUgiForUser(
+      String userName) {
+    long now = System.currentTimeMillis();
+    long cutoffTime = now - ugiLifetime;
+    CachedUgi cachedUgi = ugiCache.get(userName);
+    if (cachedUgi != null && cachedUgi.getInitTime() > cutoffTime)
+      return cachedUgi.getUgi();
+    UnixUserGroupInformation ugi = null;
+    try {
+      ugi = getUgi(userName);
+    } catch (IOException e) {
+      return null;
+    }
+    if (ugiCache.size() > CLEANUP_THRESHOLD) { // remove expired ugi's first
+      for (Iterator<Map.Entry<String, CachedUgi>> it = ugiCache.entrySet()
+          .iterator(); it.hasNext();) {
+        Map.Entry<String, CachedUgi> e = it.next();
+        if (e.getValue().getInitTime() < cutoffTime) {
+          it.remove();
+        }
+      }
+    }
+    ugiCache.put(ugi.getUserName(), new CachedUgi(ugi, now));
+    return ugi;
+  }
+
+  /** clear the ugi cache */
+  public static synchronized void clearCache() {
+    ugiCache.clear();
+  }
+
+  /** set ugi lifetime, only for junit testing purposes */
+  static synchronized void setUgiLifetime(long lifetime) {
+    ugiLifetime = lifetime;
+  }
+
+  /** save an ugi to cache, only for junit testing purposes */
+  static synchronized void saveToCache(UnixUserGroupInformation ugi) {
+    ugiCache.put(ugi.getUserName(), new CachedUgi(ugi, System
+        .currentTimeMillis()));
+  }
+
+  /** get cache size, only for junit testing purposes */
+  static synchronized int getCacheSize() {
+    return ugiCache.size();
+  }
+
+  /**
+   * Get the ugi for a user by running shell command "id -Gn"
+   * 
+   * @param userName name of the user
+   * @return ugi of the user
+   * @throws IOException if encounter any error while running the command
+   */
+  private static UnixUserGroupInformation getUgi(String userName)
+      throws IOException {
+    if (userName == null || !USERNAME_PATTERN.matcher(userName).matches())
+      throw new IOException("Invalid username=" + userName);
+    String[] cmd = new String[] { "bash", "-c", "id -Gn '" + userName + "'"};
+    String[] groups = Shell.execCommand(cmd).split("\\s+");
+    return new UnixUserGroupInformation(userName, groups);
+  }
+
+  /** cached ugi object with its associated init time */
+  private static class CachedUgi {
+    final UnixUserGroupInformation ugi;
+    final long initTime;
+
+    CachedUgi(UnixUserGroupInformation ugi, long initTime) {
+      this.ugi = ugi;
+      this.initTime = initTime;
+    }
+
+    UnixUserGroupInformation getUgi() {
+      return ugi;
+    }
+
+    long getInitTime() {
+      return initTime;
+    }
+
+    /** {@inheritDoc} */
+    public int hashCode() {
+      return ugi.hashCode();
+    }
+
+    static boolean isEqual(Object a, Object b) {
+      return a == b || (a != null && a.equals(b));
+    }
+
+    /** {@inheritDoc} */
+    public boolean equals(Object obj) {
+      if (obj == this) {
+        return true;
+      }
+      if (obj != null && obj instanceof CachedUgi) {
+        CachedUgi that = (CachedUgi) obj;
+        return isEqual(this.ugi, that.ugi) && this.initTime == that.initTime;
+      }
+      return false;
+    }
+
+  }
+}

+ 262 - 0
src/contrib/hdfsproxy/src/test/org/apache/hadoop/hdfsproxy/TestHdfsProxy.java

@@ -0,0 +1,262 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfsproxy;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.URI;
+import java.util.Random;
+
+import junit.framework.TestCase;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.impl.Log4JLogger;
+import org.apache.log4j.Level;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.tools.DistCp;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * A JUnit test for HdfsProxy
+ */
+public class TestHdfsProxy extends TestCase {
+  {
+    ((Log4JLogger) LogFactory.getLog("org.apache.hadoop.hdfs.StateChange"))
+        .getLogger().setLevel(Level.OFF);
+    ((Log4JLogger) DataNode.LOG).getLogger().setLevel(Level.OFF);
+    ((Log4JLogger) FSNamesystem.LOG).getLogger().setLevel(Level.OFF);
+    ((Log4JLogger) DistCp.LOG).getLogger().setLevel(Level.ALL);
+  }
+
+  static final URI LOCAL_FS = URI.create("file:///");
+
+  private static final int NFILES = 10;
+  private static String TEST_ROOT_DIR = new Path(System.getProperty(
+      "test.build.data", "/tmp")).toString().replace(' ', '+');
+
+  /**
+   * class MyFile contains enough information to recreate the contents of a
+   * single file.
+   */
+  private static class MyFile {
+    private static Random gen = new Random();
+    private static final int MAX_LEVELS = 3;
+    private static final int MAX_SIZE = 8 * 1024;
+    private static String[] dirNames = { "zero", "one", "two", "three", "four",
+        "five", "six", "seven", "eight", "nine" };
+    private final String name;
+    private int size = 0;
+    private long seed = 0L;
+
+    MyFile() {
+      this(gen.nextInt(MAX_LEVELS));
+    }
+
+    MyFile(int nLevels) {
+      String xname = "";
+      if (nLevels != 0) {
+        int[] levels = new int[nLevels];
+        for (int idx = 0; idx < nLevels; idx++) {
+          levels[idx] = gen.nextInt(10);
+        }
+        StringBuffer sb = new StringBuffer();
+        for (int idx = 0; idx < nLevels; idx++) {
+          sb.append(dirNames[levels[idx]]);
+          sb.append("/");
+        }
+        xname = sb.toString();
+      }
+      long fidx = gen.nextLong() & Long.MAX_VALUE;
+      name = xname + Long.toString(fidx);
+      reset();
+    }
+
+    void reset() {
+      final int oldsize = size;
+      do {
+        size = gen.nextInt(MAX_SIZE);
+      } while (oldsize == size);
+      final long oldseed = seed;
+      do {
+        seed = gen.nextLong() & Long.MAX_VALUE;
+      } while (oldseed == seed);
+    }
+
+    String getName() {
+      return name;
+    }
+
+    int getSize() {
+      return size;
+    }
+
+    long getSeed() {
+      return seed;
+    }
+  }
+
+  private static MyFile[] createFiles(URI fsname, String topdir)
+      throws IOException {
+    return createFiles(FileSystem.get(fsname, new Configuration()), topdir);
+  }
+
+  /**
+   * create NFILES with random names and directory hierarchies with random (but
+   * reproducible) data in them.
+   */
+  private static MyFile[] createFiles(FileSystem fs, String topdir)
+      throws IOException {
+    Path root = new Path(topdir);
+    MyFile[] files = new MyFile[NFILES];
+    for (int i = 0; i < NFILES; i++) {
+      files[i] = createFile(root, fs);
+    }
+    return files;
+  }
+
+  private static MyFile createFile(Path root, FileSystem fs, int levels)
+      throws IOException {
+    MyFile f = levels < 0 ? new MyFile() : new MyFile(levels);
+    Path p = new Path(root, f.getName());
+    FSDataOutputStream out = fs.create(p);
+    byte[] toWrite = new byte[f.getSize()];
+    new Random(f.getSeed()).nextBytes(toWrite);
+    out.write(toWrite);
+    out.close();
+    FileSystem.LOG.info("created: " + p + ", size=" + f.getSize());
+    return f;
+  }
+
+  private static MyFile createFile(Path root, FileSystem fs) throws IOException {
+    return createFile(root, fs, -1);
+  }
+
+  private static boolean checkFiles(FileSystem fs, String topdir, MyFile[] files)
+      throws IOException {
+    return checkFiles(fs, topdir, files, false);
+  }
+
+  private static boolean checkFiles(FileSystem fs, String topdir,
+      MyFile[] files, boolean existingOnly) throws IOException {
+    Path root = new Path(topdir);
+
+    for (int idx = 0; idx < files.length; idx++) {
+      Path fPath = new Path(root, files[idx].getName());
+      try {
+        fs.getFileStatus(fPath);
+        FSDataInputStream in = fs.open(fPath);
+        byte[] toRead = new byte[files[idx].getSize()];
+        byte[] toCompare = new byte[files[idx].getSize()];
+        Random rb = new Random(files[idx].getSeed());
+        rb.nextBytes(toCompare);
+        assertEquals("Cannnot read file.", toRead.length, in.read(toRead));
+        in.close();
+        for (int i = 0; i < toRead.length; i++) {
+          if (toRead[i] != toCompare[i]) {
+            return false;
+          }
+        }
+        toRead = null;
+        toCompare = null;
+      } catch (FileNotFoundException fnfe) {
+        if (!existingOnly) {
+          throw fnfe;
+        }
+      }
+    }
+
+    return true;
+  }
+
+  /** delete directory and everything underneath it. */
+  private static void deldir(FileSystem fs, String topdir) throws IOException {
+    fs.delete(new Path(topdir), true);
+  }
+
+  /** verify hdfsproxy implements the hftp interface */
+  public void testHdfsProxyInterface() throws Exception {
+    MiniDFSCluster cluster = null;
+    HdfsProxy proxy = null;
+    try {
+
+      final Configuration dfsConf = new Configuration();
+      cluster = new MiniDFSCluster(dfsConf, 2, true, null);
+      cluster.waitActive();
+
+      final DistCp distcp = new DistCp(dfsConf);
+      final FileSystem localfs = FileSystem.get(LOCAL_FS, dfsConf);
+      final FileSystem hdfs = cluster.getFileSystem();
+      final Configuration proxyConf = new Configuration(false);
+      proxyConf.set("hdfsproxy.dfs.namenode.address", hdfs.getUri().getHost() + ":"
+          + hdfs.getUri().getPort());
+      proxyConf.set("hdfsproxy.https.address", "127.0.0.1:0");
+      final String namenode = hdfs.getUri().toString();
+      if (namenode.startsWith("hdfs://")) {
+        MyFile[] files = createFiles(LOCAL_FS, TEST_ROOT_DIR + "/srcdat");
+        ToolRunner.run(distcp, new String[] { "-log", namenode + "/logs",
+            "file:///" + TEST_ROOT_DIR + "/srcdat", namenode + "/destdat" });
+        assertTrue("Source and destination directories do not match.",
+            checkFiles(hdfs, "/destdat", files));
+        assertTrue("Log directory does not exist.", hdfs.exists(new Path(
+            namenode + "/logs")));
+
+        proxy = new HdfsProxy(proxyConf);
+        InetSocketAddress proxyAddr = NetUtils.createSocketAddr("127.0.0.1:0");
+        proxy.addListener(proxyAddr, true);
+        proxy.start();
+        final String realProxyAddr = proxyAddr.getHostName() + ":"
+            + proxy.getPort();
+
+        ToolRunner.run(distcp, new String[] {
+            "hftp://" + realProxyAddr + "/destdat", namenode + "/copied1" });
+        assertTrue("Source and copied directories do not match.", checkFiles(
+            hdfs, "/copied1", files));
+
+        ToolRunner.run(distcp, new String[] {
+            "hftp://" + realProxyAddr + "/destdat",
+            "file:///" + TEST_ROOT_DIR + "/copied2" });
+        assertTrue("Source and copied directories do not match.", checkFiles(
+            localfs, TEST_ROOT_DIR + "/copied2", files));
+
+        deldir(hdfs, "/destdat");
+        deldir(hdfs, "/logs");
+        deldir(hdfs, "/copied1");
+        deldir(localfs, TEST_ROOT_DIR + "/srcdat");
+        deldir(localfs, TEST_ROOT_DIR + "/copied2");
+      }
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+      if (proxy != null) {
+        proxy.stop();
+      }
+    }
+  }
+}

+ 107 - 0
src/contrib/hdfsproxy/src/test/org/apache/hadoop/hdfsproxy/TestProxyUgiManager.java

@@ -0,0 +1,107 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfsproxy;
+
+import org.apache.hadoop.security.UnixUserGroupInformation;
+
+import junit.framework.TestCase;
+
+/** Unit tests for ProxyUgiManager */
+public class TestProxyUgiManager extends TestCase {
+
+  private static final UnixUserGroupInformation root1Ugi = new UnixUserGroupInformation(
+      "root", new String[] { "group1" });
+  private static final UnixUserGroupInformation root2Ugi = new UnixUserGroupInformation(
+      "root", new String[] { "group2" });
+  private static final long ugiLifetime = 1000L; // milliseconds
+
+  /** Test caching functionality */
+  public void testCache() throws Exception {
+    ProxyUgiManager.saveToCache(root1Ugi);
+    UnixUserGroupInformation ugi = ProxyUgiManager.getUgiForUser(root1Ugi
+        .getUserName());
+    assertEquals(root1Ugi, ugi);
+    ProxyUgiManager.saveToCache(root2Ugi);
+    ugi = ProxyUgiManager.getUgiForUser(root2Ugi.getUserName());
+    assertEquals(root2Ugi, ugi);
+  }
+
+  /** Test clearCache method */
+  public void testClearCache() throws Exception {
+    UnixUserGroupInformation ugi = ProxyUgiManager.getUgiForUser(root1Ugi
+        .getUserName());
+    if (root1Ugi.equals(ugi)) {
+      ProxyUgiManager.saveToCache(root2Ugi);
+      ugi = ProxyUgiManager.getUgiForUser(root2Ugi.getUserName());
+      assertEquals(root2Ugi, ugi);
+      ProxyUgiManager.clearCache();
+      ugi = ProxyUgiManager.getUgiForUser(root2Ugi.getUserName());
+      assertFalse(root2Ugi.equals(ugi));
+    } else {
+      ProxyUgiManager.saveToCache(root1Ugi);
+      ugi = ProxyUgiManager.getUgiForUser(root1Ugi.getUserName());
+      assertEquals(root1Ugi, ugi);
+      ProxyUgiManager.clearCache();
+      ugi = ProxyUgiManager.getUgiForUser(root1Ugi.getUserName());
+      assertFalse(root1Ugi.equals(ugi));
+    }
+  }
+
+  /** Test cache timeout */
+  public void testTimeOut() throws Exception {
+    String[] users = new String[] { "root", "nobody", "SYSTEM",
+        "Administrator", "Administrators", "Guest" };
+    String realUser = null;
+    UnixUserGroupInformation ugi = null;
+    ProxyUgiManager.clearCache();
+    for (String user : users) {
+      ugi = ProxyUgiManager.getUgiForUser(user);
+      if (ugi != null) {
+        realUser = user;
+        break;
+      }
+    }
+    if (realUser != null) {
+      ProxyUgiManager.setUgiLifetime(ugiLifetime);
+      ProxyUgiManager.clearCache();
+      UnixUserGroupInformation[] fakedUgis = generateUgi(ProxyUgiManager.CLEANUP_THRESHOLD);
+      for (int i = 0; i < ProxyUgiManager.CLEANUP_THRESHOLD; i++) {
+        ProxyUgiManager.saveToCache(fakedUgis[i]);
+      }
+      assertTrue(ProxyUgiManager.getCacheSize() == ProxyUgiManager.CLEANUP_THRESHOLD);
+      Thread.sleep(ugiLifetime + 1000L);
+      UnixUserGroupInformation newugi = ProxyUgiManager.getUgiForUser(realUser);
+      assertTrue(ProxyUgiManager.getCacheSize() == ProxyUgiManager.CLEANUP_THRESHOLD + 1);
+      assertEquals(newugi, ugi);
+      Thread.sleep(ugiLifetime + 1000L);
+      newugi = ProxyUgiManager.getUgiForUser(realUser);
+      assertTrue(ProxyUgiManager.getCacheSize() == 1);
+      assertEquals(newugi, ugi);
+    }
+  }
+
+  private static UnixUserGroupInformation[] generateUgi(int size) {
+    UnixUserGroupInformation[] ugis = new UnixUserGroupInformation[size];
+    for (int i = 0; i < size; i++) {
+      ugis[i] = new UnixUserGroupInformation("user" + i,
+          new String[] { "group" });
+    }
+    return ugis;
+  }
+}

+ 19 - 2
src/hdfs/org/apache/hadoop/hdfs/HftpFileSystem.java

@@ -23,15 +23,19 @@ import java.io.InputStream;
 import java.io.IOException;
 
 import java.net.HttpURLConnection;
+import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
+import java.net.UnknownHostException;
 
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
 
 import java.util.ArrayList;
+import java.util.Random;
+
 import javax.security.auth.login.LoginException;
 
 import org.xml.sax.Attributes;
@@ -71,6 +75,7 @@ public class HftpFileSystem extends FileSystem {
 
   protected InetSocketAddress nnAddr;
   protected UserGroupInformation ugi; 
+  protected final Random ran = new Random();
 
   protected static final SimpleDateFormat df = ListPathsServlet.df;
 
@@ -85,14 +90,26 @@ public class HftpFileSystem extends FileSystem {
 
     nnAddr = NetUtils.createSocketAddr(name.toString());
   }
+  
+  /** randomly pick one from all available IP addresses of a given hostname */
+  protected String pickOneAddress(String hostname) throws UnknownHostException {
+    if ("localhost".equals(hostname))
+      return hostname;
+    InetAddress[] addrs = InetAddress.getAllByName(hostname);
+    if (addrs.length > 1)
+      return addrs[ran.nextInt(addrs.length)].getHostAddress();
+    return addrs[0].getHostAddress();
+  }
 
   @Override
   public URI getUri() {
     try {
-      return new URI("hftp", null, nnAddr.getHostName(), nnAddr.getPort(),
+      return new URI("hftp", null, pickOneAddress(nnAddr.getHostName()), nnAddr.getPort(),
                      null, null, null);
     } catch (URISyntaxException e) {
       return null;
+    } catch (UnknownHostException e) {
+      return null;
     }
   }
 
@@ -104,7 +121,7 @@ public class HftpFileSystem extends FileSystem {
   protected HttpURLConnection openConnection(String path, String query)
       throws IOException {
     try {
-      final URL url = new URI("http", null, nnAddr.getHostName(),
+      final URL url = new URI("http", null, pickOneAddress(nnAddr.getHostName()),
           nnAddr.getPort(), path, query, null).toURL();
       if (LOG.isTraceEnabled()) {
         LOG.trace("url=" + url);

+ 5 - 2
src/hdfs/org/apache/hadoop/hdfs/HsftpFileSystem.java

@@ -24,6 +24,7 @@ import java.net.HttpURLConnection;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
+import java.net.UnknownHostException;
 
 import org.apache.hadoop.fs.Path;
 
@@ -39,7 +40,7 @@ public class HsftpFileSystem extends HftpFileSystem {
   protected HttpURLConnection openConnection(String path, String query)
       throws IOException {
     try {
-      final URL url = new URI("https", null, nnAddr.getHostName(),
+      final URL url = new URI("https", null, pickOneAddress(nnAddr.getHostName()),
           nnAddr.getPort(), path, query, null).toURL();
       return (HttpURLConnection)url.openConnection();
     } catch (URISyntaxException e) {
@@ -50,10 +51,12 @@ public class HsftpFileSystem extends HftpFileSystem {
   @Override
   public URI getUri() {
     try {
-      return new URI("hsftp", null, nnAddr.getHostName(), nnAddr.getPort(),
+      return new URI("hsftp", null, pickOneAddress(nnAddr.getHostName()), nnAddr.getPort(),
                      null, null, null);
     } catch (URISyntaxException e) {
       return null;
+    } catch (UnknownHostException e) {
+      return null;
     }
   }
 

+ 3 - 2
src/hdfs/org/apache/hadoop/hdfs/server/namenode/DfsServlet.java

@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdfs.server.namenode;
 
 import java.io.IOException;
+import java.net.InetSocketAddress;
 import java.net.URI;
 import java.net.URISyntaxException;
 
@@ -63,12 +64,12 @@ abstract class DfsServlet extends HttpServlet {
   protected ClientProtocol createNameNodeProxy(UnixUserGroupInformation ugi
       ) throws IOException {
     ServletContext context = getServletContext();
-    NameNode nn = (NameNode)context.getAttribute("name.node");
+    InetSocketAddress nnAddr = (InetSocketAddress)context.getAttribute("name.node.address");
     Configuration conf = new Configuration(
         (Configuration)context.getAttribute("name.conf"));
     UnixUserGroupInformation.saveToConf(conf,
         UnixUserGroupInformation.UGI_PROPERTY_NAME, ugi);
-    return DFSClient.createNamenode(nn.getNameNodeAddress(), conf);
+    return DFSClient.createNamenode(nnAddr, conf);
   }
 
   /** Create a URI for redirecting request */

+ 1 - 0
src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -368,6 +368,7 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean {
     this.infoServer.setAttribute("datanode.https.port",
         datanodeSslPort.getPort());
     this.infoServer.setAttribute("name.node", nn);
+    this.infoServer.setAttribute("name.node.address", nn.getNameNodeAddress());
     this.infoServer.setAttribute("name.system.image", getFSImage());
     this.infoServer.setAttribute("name.conf", conf);
     this.infoServer.addInternalServlet("fsck", "/fsck", FsckServlet.class);

+ 11 - 4
src/hdfs/org/apache/hadoop/hdfs/server/namenode/FileDataServlet.java

@@ -35,9 +35,11 @@ import org.apache.hadoop.security.UnixUserGroupInformation;
  */
 public class FileDataServlet extends DfsServlet {
 
-  private URI createUri(FileStatus i, UnixUserGroupInformation ugi,
-      ClientProtocol nnproxy, String scheme)
+  /** Create a redirection URI */
+  protected URI createUri(FileStatus i, UnixUserGroupInformation ugi,
+      ClientProtocol nnproxy, HttpServletRequest request)
       throws IOException, URISyntaxException {
+    String scheme = request.getScheme();
     final DatanodeID host = pickSrcDatanode(i, nnproxy);
     final String hostname;
     if (host instanceof DatanodeInfo) {
@@ -52,7 +54,7 @@ public class FileDataServlet extends DfsServlet {
         "/streamFile", "filename=" + i.getPath() + "&ugi=" + ugi, null);
   }
 
-  private final static JspHelper jspHelper = new JspHelper();
+  private static JspHelper jspHelper = null;
 
   /** Select a datanode to service this request.
    * Currently, this looks at no more than the first five blocks of a file,
@@ -60,6 +62,11 @@ public class FileDataServlet extends DfsServlet {
    */
   private static DatanodeID pickSrcDatanode(FileStatus i,
       ClientProtocol nnproxy) throws IOException {
+    // a race condition can happen by initializing a static member this way.
+    // A proper fix should make JspHelper a singleton. Since it doesn't affect 
+    // correctness, we leave it as is for now.
+    if (jspHelper == null)
+      jspHelper = new JspHelper();
     final LocatedBlocks blks = nnproxy.getBlockLocations(
         i.getPath().toUri().getPath(), 0, 1);
     if (i.getLen() == 0 || blks.getLocatedBlocks().size() <= 0) {
@@ -87,7 +94,7 @@ public class FileDataServlet extends DfsServlet {
       FileStatus info = nnproxy.getFileInfo(path);
       if ((info != null) && !info.isDir()) {
         response.sendRedirect(createUri(info, ugi, nnproxy,
-              request.getScheme()).toURL().toString());
+              request).toURL().toString());
       } else if (info == null){
         response.sendError(400, "cat: File not found " + path);
       } else {

+ 10 - 4
src/hdfs/org/apache/hadoop/hdfs/server/namenode/StreamFile.java

@@ -36,12 +36,18 @@ public class StreamFile extends DfsServlet {
       nameNodeAddr = datanode.getNameNodeAddr();
     }
   }
-  public void doGet(HttpServletRequest request, HttpServletResponse response)
-    throws ServletException, IOException {
+  
+  /** getting a client for connecting to dfs */
+  protected DFSClient getDFSClient(HttpServletRequest request)
+      throws IOException {
     Configuration conf = new Configuration(masterConf);
     UnixUserGroupInformation.saveToConf(conf,
         UnixUserGroupInformation.UGI_PROPERTY_NAME, getUGI(request));
-
+    return new DFSClient(nameNodeAddr, conf);
+  }
+  
+  public void doGet(HttpServletRequest request, HttpServletResponse response)
+    throws ServletException, IOException {
     String filename = request.getParameter("filename");
     if (filename == null || filename.length() == 0) {
       response.setContentType("text/plain");
@@ -49,7 +55,7 @@ public class StreamFile extends DfsServlet {
       out.print("Invalid input");
       return;
     }
-    DFSClient dfs = new DFSClient(nameNodeAddr, conf);
+    DFSClient dfs = getDFSClient(request);
     FSInputStream in = dfs.open(filename);
     OutputStream os = response.getOutputStream();
     response.setHeader("Content-Disposition", "attachment; filename=\"" +