Browse Source

Revert "Revert "Merge branch 'trunk' into HDFS-7240""
After testing it was confirmed that these changes work as
expected.

This reverts commit 7a542fb3270953fff039c9b1bd7ba7afa35a842c.

Anu Engineer 7 years ago
parent
commit
b78c94f44c
100 changed files with 11055 additions and 107 deletions
  1. 11 21
      BUILDING.txt
  2. 1 0
      dev-support/bin/dist-layout-stitching
  3. 39 0
      dev-support/bin/win-vs-upgrade.cmd
  4. 3 0
      dev-support/docker/Dockerfile
  5. 49 0
      dev-support/win-paths-eg.cmd
  6. 0 15
      hadoop-common-project/hadoop-annotations/pom.xml
  7. 28 0
      hadoop-common-project/hadoop-common/pom.xml
  8. 4 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
  9. 24 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ThreadUtil.java
  10. 2 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/VersionInfo.java
  11. 2 0
      hadoop-common-project/hadoop-common/src/main/native/native.vcxproj
  12. 33 0
      hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/crypto/OpensslCipher.c
  13. 31 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
  14. 2 2
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextTestHelper.java
  15. 31 1
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java
  16. 5 2
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
  17. 6 0
      hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java
  18. 1 1
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
  19. 98 18
      hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml
  20. 7 10
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/CMakeLists.txt
  21. 16 6
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/native_mini_dfs.c
  22. 8 3
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/native_mini_dfs.h
  23. 350 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/test_libhdfs_mini_stress.c
  24. 59 11
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/test_libhdfs_threaded.c
  25. 49 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CMake/FindCyrusSASL.cmake
  26. 44 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CMake/FindGSasl.cmake
  27. 297 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CMakeLists.txt
  28. 161 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CONTRIBUTING.md
  29. 35 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/doc/Doxyfile.in
  30. 5 13
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/doc/mainpage.dox
  31. 20 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/CMakeLists.txt
  32. 20 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/CMakeLists.txt
  33. 27 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/cat/CMakeLists.txt
  34. 121 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/cat/cat.c
  35. 27 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/connect_cancel/CMakeLists.txt
  36. 107 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/connect_cancel/connect_cancel.c
  37. 24 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/CMakeLists.txt
  38. 27 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/cat/CMakeLists.txt
  39. 89 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/cat/cat.cc
  40. 27 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/connect_cancel/CMakeLists.txt
  41. 154 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/connect_cancel/connect_cancel.cc
  42. 27 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/find/CMakeLists.txt
  43. 140 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/find/find.cc
  44. 27 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/gendirs/CMakeLists.txt
  45. 122 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/gendirs/gendirs.cc
  46. 177 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/block_location.h
  47. 68 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/config_parser.h
  48. 48 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/content_summary.h
  49. 141 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/events.h
  50. 48 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/fsinfo.h
  51. 394 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/hdfs_ext.h
  52. 492 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/hdfspp.h
  53. 110 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/locks.h
  54. 60 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/log.h
  55. 136 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/options.h
  56. 59 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/statinfo.h
  57. 111 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/status.h
  58. 137 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/uri.h
  59. 25 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/CMakeLists.txt
  60. 19 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/bindings/CMakeLists.txt
  61. 21 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/bindings/c/CMakeLists.txt
  62. 2007 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/bindings/c/hdfs.cc
  63. 24 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/CMakeLists.txt
  64. 49 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/async_stream.h
  65. 18 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/auth_info.cc
  66. 90 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/auth_info.h
  67. 37 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/cancel_tracker.cc
  68. 40 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/cancel_tracker.h
  69. 219 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/config_parser.cc
  70. 169 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration.cc
  71. 108 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration.h
  72. 328 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration_loader.cc
  73. 138 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration_loader.h
  74. 122 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration_loader_impl.h
  75. 55 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/content_summary.cc
  76. 65 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/continuation/asio.h
  77. 137 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/continuation/continuation.h
  78. 129 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/continuation/protobuf.h
  79. 61 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/fsinfo.cc
  80. 210 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/hdfs_configuration.cc
  81. 70 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/hdfs_configuration.h
  82. 146 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/hdfs_ioservice.cc
  83. 79 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/hdfs_ioservice.h
  84. 89 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/libhdfs_events_impl.cc
  85. 59 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/libhdfs_events_impl.h
  86. 100 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/locks.cc
  87. 227 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/logging.cc
  88. 217 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/logging.h
  89. 178 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/namenode_info.cc
  90. 49 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/namenode_info.h
  91. 52 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/new_delete.h
  92. 43 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/optional_wrapper.h
  93. 61 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/options.cc
  94. 87 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/retry_policy.cc
  95. 160 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/retry_policy.h
  96. 66 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/sasl_authenticator.h
  97. 240 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/sasl_digest_md5.cc
  98. 74 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/statinfo.cc
  99. 192 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/status.cc
  100. 454 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/uri.cc

+ 11 - 21
BUILDING.txt

@@ -11,6 +11,8 @@ Requirements:
 * Zlib devel (if compiling native code)
 * Zlib devel (if compiling native code)
 * openssl devel (if compiling native hadoop-pipes and to get the best HDFS encryption performance)
 * openssl devel (if compiling native hadoop-pipes and to get the best HDFS encryption performance)
 * Linux FUSE (Filesystem in Userspace) version 2.6 or above (if compiling fuse_dfs)
 * Linux FUSE (Filesystem in Userspace) version 2.6 or above (if compiling fuse_dfs)
+* Jansson C XML parsing library ( if compiling libwebhdfs )
+* Doxygen ( if compiling libhdfspp and generating the documents )
 * Internet connection for first build (to fetch all Maven and Hadoop dependencies)
 * Internet connection for first build (to fetch all Maven and Hadoop dependencies)
 * python (for releasedocs)
 * python (for releasedocs)
 * bats (for shell code testing)
 * bats (for shell code testing)
@@ -348,7 +350,7 @@ Requirements:
 * Maven 3.0 or later
 * Maven 3.0 or later
 * ProtocolBuffer 2.5.0
 * ProtocolBuffer 2.5.0
 * CMake 3.1 or newer
 * CMake 3.1 or newer
-* Windows SDK 7.1 or Visual Studio 2010 Professional
+* Visual Studio 2010 Professional or Higher
 * Windows SDK 8.1 (if building CPU rate control for the container executor)
 * Windows SDK 8.1 (if building CPU rate control for the container executor)
 * zlib headers (if building native code bindings for zlib)
 * zlib headers (if building native code bindings for zlib)
 * Internet connection for first build (to fetch all Maven and Hadoop dependencies)
 * Internet connection for first build (to fetch all Maven and Hadoop dependencies)
@@ -359,18 +361,15 @@ Requirements:
 Unix command-line tools are also included with the Windows Git package which
 Unix command-line tools are also included with the Windows Git package which
 can be downloaded from http://git-scm.com/downloads
 can be downloaded from http://git-scm.com/downloads
 
 
-If using Visual Studio, it must be Visual Studio 2010 Professional (not 2012).
+If using Visual Studio, it must be Professional level or higher.
 Do not use Visual Studio Express.  It does not support compiling for 64-bit,
 Do not use Visual Studio Express.  It does not support compiling for 64-bit,
-which is problematic if running a 64-bit system.  The Windows SDK 7.1 is free to
-download here:
-
-http://www.microsoft.com/en-us/download/details.aspx?id=8279
+which is problematic if running a 64-bit system.
 
 
 The Windows SDK 8.1 is available to download at:
 The Windows SDK 8.1 is available to download at:
 
 
 http://msdn.microsoft.com/en-us/windows/bg162891.aspx
 http://msdn.microsoft.com/en-us/windows/bg162891.aspx
 
 
-Cygwin is neither required nor supported.
+Cygwin is not required.
 
 
 ----------------------------------------------------------------------------------
 ----------------------------------------------------------------------------------
 Building:
 Building:
@@ -378,21 +377,12 @@ Building:
 Keep the source code tree in a short path to avoid running into problems related
 Keep the source code tree in a short path to avoid running into problems related
 to Windows maximum path length limitation (for example, C:\hdc).
 to Windows maximum path length limitation (for example, C:\hdc).
 
 
-Run builds from a Windows SDK Command Prompt. (Start, All Programs,
-Microsoft Windows SDK v7.1, Windows SDK 7.1 Command Prompt).
-
-JAVA_HOME must be set, and the path must not contain spaces. If the full path
-would contain spaces, then use the Windows short path instead.
-
-You must set the Platform environment variable to either x64 or Win32 depending
-on whether you're running a 64-bit or 32-bit system. Note that this is
-case-sensitive. It must be "Platform", not "PLATFORM" or "platform".
-Environment variables on Windows are usually case-insensitive, but Maven treats
-them as case-sensitive. Failure to set this environment variable correctly will
-cause msbuild to fail while building the native code in hadoop-common.
+There is one support command file located in dev-support called win-paths-eg.cmd.
+It should be copied somewhere convenient and modified to fit your needs.
 
 
-set Platform=x64 (when building on a 64-bit system)
-set Platform=Win32 (when building on a 32-bit system)
+win-paths-eg.cmd sets up the environment for use. You will need to modify this
+file. It will put all of the required components in the command path,
+configure the bit-ness of the build, and set several optional components.
 
 
 Several tests require that the user must have the Create Symbolic Links
 Several tests require that the user must have the Create Symbolic Links
 privilege.
 privilege.

+ 1 - 0
dev-support/bin/dist-layout-stitching

@@ -128,6 +128,7 @@ run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs/target/hadoop-hdfs-${VERSION}"
 run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs-nfs/target/hadoop-hdfs-nfs-${VERSION}" .
 run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs-nfs/target/hadoop-hdfs-nfs-${VERSION}" .
 run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs-client/target/hadoop-hdfs-client-${VERSION}" .
 run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs-client/target/hadoop-hdfs-client-${VERSION}" .
 run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs-native-client/target/hadoop-hdfs-native-client-${VERSION}" .
 run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs-native-client/target/hadoop-hdfs-native-client-${VERSION}" .
+run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs-rbf/target/hadoop-hdfs-rbf-${VERSION}" .
 
 
 run copy "${ROOT}/hadoop-yarn-project/target/hadoop-yarn-project-${VERSION}" .
 run copy "${ROOT}/hadoop-yarn-project/target/hadoop-yarn-project-${VERSION}" .
 run copy "${ROOT}/hadoop-mapreduce-project/target/hadoop-mapreduce-${VERSION}" .
 run copy "${ROOT}/hadoop-mapreduce-project/target/hadoop-mapreduce-${VERSION}" .

+ 39 - 0
dev-support/bin/win-vs-upgrade.cmd

@@ -0,0 +1,39 @@
+@ECHO OFF
+@REM Licensed to the Apache Software Foundation (ASF) under one or more
+@REM contributor license agreements.  See the NOTICE file distributed with
+@REM this work for additional information regarding copyright ownership.
+@REM The ASF licenses this file to You under the Apache License, Version 2.0
+@REM (the "License"); you may not use this file except in compliance with
+@REM the License.  You may obtain a copy of the License at
+@REM
+@REM     http://www.apache.org/licenses/LICENSE-2.0
+@REM
+@REM Unless required by applicable law or agreed to in writing, software
+@REM distributed under the License is distributed on an "AS IS" BASIS,
+@REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@REM See the License for the specific language governing permissions and
+@REM limitations under the License.
+
+@WHERE devenv
+IF %ERRORLEVEL% NEQ 0 (
+  @ECHO "devenv command was not found. Verify your compiler installation level."
+  EXIT /b 1
+)
+
+@REM Need to save output to a file because for loop will just
+@REM loop forever... :(
+
+SET srcdir=%1
+SET workdir=%2
+
+IF EXIST %srcdir%\Backup (
+  @ECHO "Solution files already upgraded."
+  EXIT /b 0
+)
+
+CD %srcdir%
+DIR /B *.sln > %workdir%\HADOOP-SLN-UPGRADE.TXT
+
+FOR /F %%f IN (%workdir%\HADOOP-SLN-UPGRADE.TXT) DO (
+  devenv %%f /upgrade
+)

+ 3 - 0
dev-support/docker/Dockerfile

@@ -42,6 +42,7 @@ RUN apt-get -q update && apt-get -q install -y \
     apt-utils \
     apt-utils \
     build-essential \
     build-essential \
     bzip2 \
     bzip2 \
+    clang \
     curl \
     curl \
     doxygen \
     doxygen \
     fuse \
     fuse \
@@ -54,6 +55,7 @@ RUN apt-get -q update && apt-get -q install -y \
     libfuse-dev \
     libfuse-dev \
     libprotobuf-dev \
     libprotobuf-dev \
     libprotoc-dev \
     libprotoc-dev \
+    libsasl2-dev \
     libsnappy-dev \
     libsnappy-dev \
     libssl-dev \
     libssl-dev \
     libtool \
     libtool \
@@ -71,6 +73,7 @@ RUN apt-get -q update && apt-get -q install -y \
     software-properties-common \
     software-properties-common \
     snappy \
     snappy \
     sudo \
     sudo \
+    valgrind \
     zlib1g-dev
     zlib1g-dev
 
 
 #######
 #######

+ 49 - 0
dev-support/win-paths-eg.cmd

@@ -0,0 +1,49 @@
+@ECHO OFF
+@REM Licensed to the Apache Software Foundation (ASF) under one or more
+@REM contributor license agreements.  See the NOTICE file distributed with
+@REM this work for additional information regarding copyright ownership.
+@REM The ASF licenses this file to You under the Apache License, Version 2.0
+@REM (the "License"); you may not use this file except in compliance with
+@REM the License.  You may obtain a copy of the License at
+@REM
+@REM     http://www.apache.org/licenses/LICENSE-2.0
+@REM
+@REM Unless required by applicable law or agreed to in writing, software
+@REM distributed under the License is distributed on an "AS IS" BASIS,
+@REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@REM See the License for the specific language governing permissions and
+@REM limitations under the License.
+
+@REM *************************************************
+@REM JDK and these settings MUST MATCH
+@REM
+@REM 64-bit : Platform = x64, VCVARSPLAT = amd64
+@REM
+@REM 32-bit : Platform = Win32, VCVARSPLAT = x86
+@REM
+
+SET Platform=x64
+SET VCVARSPLAT=amd64
+
+@REM ******************
+@REM Forcibly move the Maven local repo
+
+SET MAVEN_OPTS=-Dmaven.repo.local=C:\Tools\m2
+
+@REM *******************************************
+@REM
+@REM Locations of your bits and pieces
+@REM
+@REM NOTE: cmake is assumed to already be on the
+@REM command path
+@REM
+
+SET MAVEN_HOME=C:\Tools\apache-maven-3.5.0
+SET JAVA_HOME=C:\Tools\jdk
+SET MSVS=C:\Program Files (x86)\Microsoft Visual Studio 12.0
+SET PROTO_BIN=C:\Tools\protobuf-2.5.0
+SET GIT_HOME=C:\Program Files\Git
+
+SET PATH=%JAVA_HOME%\bin;%MAVEN_HOME%\bin;%PROTO_BIN%;%GIT_HOME%\bin;%PATH%
+
+CALL "%MSVS%\VC\vcvarsall.bat" %VCVARSPLAT%

+ 0 - 15
hadoop-common-project/hadoop-annotations/pom.xml

@@ -38,21 +38,6 @@
   </dependencies>
   </dependencies>
 
 
   <profiles>
   <profiles>
-    <profile>
-      <id>jdk1.7</id>
-      <activation>
-        <jdk>1.7</jdk>
-      </activation>
-      <dependencies>
-        <dependency>
-          <groupId>jdk.tools</groupId>
-          <artifactId>jdk.tools</artifactId>
-          <version>1.7</version>
-          <scope>system</scope>
-          <systemPath>${java.home}/../lib/tools.jar</systemPath>
-        </dependency>
-      </dependencies>
-    </profile>
     <profile>
     <profile>
       <id>jdk1.8</id>
       <id>jdk1.8</id>
       <activation>
       <activation>

+ 28 - 0
hadoop-common-project/hadoop-common/pom.xml

@@ -838,6 +838,20 @@
             <groupId>org.codehaus.mojo</groupId>
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>exec-maven-plugin</artifactId>
             <artifactId>exec-maven-plugin</artifactId>
             <executions>
             <executions>
+              <execution>
+                <id>convert-ms-winutils</id>
+                <phase>generate-sources</phase>
+                <goals>
+                  <goal>exec</goal>
+                </goals>
+                <configuration>
+                  <executable>${basedir}\..\..\dev-support\bin\win-vs-upgrade.cmd</executable>
+                  <arguments>
+                    <argument>${basedir}\src\main\winutils</argument>
+                    <argument>${project.build.directory}</argument>
+                  </arguments>
+                </configuration>
+              </execution>
               <execution>
               <execution>
                 <id>compile-ms-winutils</id>
                 <id>compile-ms-winutils</id>
                 <phase>compile</phase>
                 <phase>compile</phase>
@@ -857,6 +871,20 @@
                   </arguments>
                   </arguments>
                 </configuration>
                 </configuration>
               </execution>
               </execution>
+              <execution>
+                <id>convert-ms-native-dll</id>
+                <phase>generate-sources</phase>
+                <goals>
+                  <goal>exec</goal>
+                </goals>
+                <configuration>
+                  <executable>${basedir}\..\..\dev-support\bin\win-vs-upgrade.cmd</executable>
+                  <arguments>
+                    <argument>${basedir}\src\main\native</argument>
+                    <argument>${project.build.directory}</argument>
+                  </arguments>
+                </configuration>
+              </execution>
               <execution>
               <execution>
                 <id>compile-ms-native-dll</id>
                 <id>compile-ms-native-dll</id>
                 <phase>compile</phase>
                 <phase>compile</phase>

+ 4 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java

@@ -816,8 +816,11 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
    */
    */
   @SuppressWarnings("unchecked")
   @SuppressWarnings("unchecked")
   public Configuration(Configuration other) {
   public Configuration(Configuration other) {
-    this.resources = (ArrayList<Resource>) other.resources.clone();
     synchronized(other) {
     synchronized(other) {
+      // Make sure we clone a finalized state
+      // Resources like input streams can be processed only once
+      other.getProps();
+      this.resources = (ArrayList<Resource>) other.resources.clone();
       if (other.properties != null) {
       if (other.properties != null) {
         this.properties = (Properties)other.properties.clone();
         this.properties = (Properties)other.properties.clone();
       }
       }

+ 24 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ThreadUtil.java

@@ -53,8 +53,7 @@ public class ThreadUtil {
    * Convenience method that returns a resource as inputstream from the
    * Convenience method that returns a resource as inputstream from the
    * classpath.
    * classpath.
    * <p>
    * <p>
-   * It first attempts to use the Thread's context classloader and if not
-   * set it uses the class' classloader.
+   * Uses the Thread's context classloader to load resource.
    *
    *
    * @param resourceName resource to retrieve.
    * @param resourceName resource to retrieve.
    *
    *
@@ -68,6 +67,27 @@ public class ThreadUtil {
       throw new IOException("Can not read resource file '" + resourceName +
       throw new IOException("Can not read resource file '" + resourceName +
           "' because class loader of the current thread is null");
           "' because class loader of the current thread is null");
     }
     }
+    return getResourceAsStream(cl, resourceName);
+  }
+
+  /**
+   * Convenience method that returns a resource as inputstream from the
+   * classpath using given classloader.
+   * <p>
+   *
+   * @param cl ClassLoader to be used to retrieve resource.
+   * @param resourceName resource to retrieve.
+   *
+   * @throws IOException thrown if resource cannot be loaded
+   * @return inputstream with the resource.
+   */
+  public static InputStream getResourceAsStream(ClassLoader cl,
+        String resourceName)
+        throws IOException {
+    if (cl == null) {
+      throw new IOException("Can not read resource file '" + resourceName +
+          "' because given class loader is null");
+    }
     InputStream is = cl.getResourceAsStream(resourceName);
     InputStream is = cl.getResourceAsStream(resourceName);
     if (is == null) {
     if (is == null) {
       throw new IOException("Can not read resource file '" +
       throw new IOException("Can not read resource file '" +
@@ -75,4 +95,6 @@ public class ThreadUtil {
     }
     }
     return is;
     return is;
   }
   }
+
+
 }
 }

+ 2 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/VersionInfo.java

@@ -43,7 +43,8 @@ public class VersionInfo {
     String versionInfoFile = component + "-version-info.properties";
     String versionInfoFile = component + "-version-info.properties";
     InputStream is = null;
     InputStream is = null;
     try {
     try {
-      is = ThreadUtil.getResourceAsStream(versionInfoFile);
+      is = ThreadUtil.getResourceAsStream(VersionInfo.class.getClassLoader(),
+          versionInfoFile);
       info.load(is);
       info.load(is);
     } catch (IOException ex) {
     } catch (IOException ex) {
       LoggerFactory.getLogger(getClass()).warn("Could not read '" +
       LoggerFactory.getLogger(getClass()).warn("Could not read '" +

+ 2 - 0
hadoop-common-project/hadoop-common/src/main/native/native.vcxproj

@@ -71,6 +71,7 @@
   <PropertyGroup>
   <PropertyGroup>
     <SnappyLib Condition="Exists('$(CustomSnappyPrefix)\snappy.dll')">$(CustomSnappyPrefix)</SnappyLib>
     <SnappyLib Condition="Exists('$(CustomSnappyPrefix)\snappy.dll')">$(CustomSnappyPrefix)</SnappyLib>
     <SnappyLib Condition="Exists('$(CustomSnappyPrefix)\lib\snappy.dll') And '$(SnappyLib)' == ''">$(CustomSnappyPrefix)\lib</SnappyLib>
     <SnappyLib Condition="Exists('$(CustomSnappyPrefix)\lib\snappy.dll') And '$(SnappyLib)' == ''">$(CustomSnappyPrefix)\lib</SnappyLib>
+    <SnappyLib Condition="Exists('$(CustomSnappyPrefix)\bin\snappy.dll') And '$(SnappyLib)' == ''">$(CustomSnappyPrefix)\bin</SnappyLib>
     <SnappyLib Condition="Exists('$(CustomSnappyLib)') And '$(SnappyLib)' == ''">$(CustomSnappyLib)</SnappyLib>
     <SnappyLib Condition="Exists('$(CustomSnappyLib)') And '$(SnappyLib)' == ''">$(CustomSnappyLib)</SnappyLib>
     <SnappyInclude Condition="Exists('$(CustomSnappyPrefix)\snappy.h')">$(CustomSnappyPrefix)</SnappyInclude>
     <SnappyInclude Condition="Exists('$(CustomSnappyPrefix)\snappy.h')">$(CustomSnappyPrefix)</SnappyInclude>
     <SnappyInclude Condition="Exists('$(CustomSnappyPrefix)\include\snappy.h') And '$(SnappyInclude)' == ''">$(CustomSnappyPrefix)\include</SnappyInclude>
     <SnappyInclude Condition="Exists('$(CustomSnappyPrefix)\include\snappy.h') And '$(SnappyInclude)' == ''">$(CustomSnappyPrefix)\include</SnappyInclude>
@@ -82,6 +83,7 @@
   <PropertyGroup>
   <PropertyGroup>
     <IsalLib Condition="Exists('$(CustomIsalPrefix)\isa-l.dll')">$(CustomIsalPrefix)</IsalLib>
     <IsalLib Condition="Exists('$(CustomIsalPrefix)\isa-l.dll')">$(CustomIsalPrefix)</IsalLib>
     <IsalLib Condition="Exists('$(CustomIsalPrefix)\lib\isa-l.dll') And '$(IsalLib)' == ''">$(CustomIsalPrefix)\lib</IsalLib>
     <IsalLib Condition="Exists('$(CustomIsalPrefix)\lib\isa-l.dll') And '$(IsalLib)' == ''">$(CustomIsalPrefix)\lib</IsalLib>
+    <IsalLib Condition="Exists('$(CustomIsalPrefix)\bin\isa-l.dll') And '$(IsalLib)' == ''">$(CustomIsalPrefix)\bin</IsalLib>
     <IsalLib Condition="Exists('$(CustomIsalLib)') And '$(IsalLib)' == ''">$(CustomIsalLib)</IsalLib>
     <IsalLib Condition="Exists('$(CustomIsalLib)') And '$(IsalLib)' == ''">$(CustomIsalLib)</IsalLib>
     <IsalEnabled Condition="'$(IsalLib)' != ''">true</IsalEnabled>
     <IsalEnabled Condition="'$(IsalLib)' != ''">true</IsalEnabled>
   </PropertyGroup>
   </PropertyGroup>

+ 33 - 0
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/crypto/OpensslCipher.c

@@ -27,8 +27,12 @@
 #ifdef UNIX
 #ifdef UNIX
 static EVP_CIPHER_CTX * (*dlsym_EVP_CIPHER_CTX_new)(void);
 static EVP_CIPHER_CTX * (*dlsym_EVP_CIPHER_CTX_new)(void);
 static void (*dlsym_EVP_CIPHER_CTX_free)(EVP_CIPHER_CTX *);
 static void (*dlsym_EVP_CIPHER_CTX_free)(EVP_CIPHER_CTX *);
+#if OPENSSL_API_COMPAT < 0x10100000L && OPENSSL_VERSION_NUMBER >= 0x10100000L
+static int (*dlsym_EVP_CIPHER_CTX_reset)(EVP_CIPHER_CTX *);
+#else
 static int (*dlsym_EVP_CIPHER_CTX_cleanup)(EVP_CIPHER_CTX *);
 static int (*dlsym_EVP_CIPHER_CTX_cleanup)(EVP_CIPHER_CTX *);
 static void (*dlsym_EVP_CIPHER_CTX_init)(EVP_CIPHER_CTX *);
 static void (*dlsym_EVP_CIPHER_CTX_init)(EVP_CIPHER_CTX *);
+#endif
 static int (*dlsym_EVP_CIPHER_CTX_set_padding)(EVP_CIPHER_CTX *, int);
 static int (*dlsym_EVP_CIPHER_CTX_set_padding)(EVP_CIPHER_CTX *, int);
 static int (*dlsym_EVP_CIPHER_CTX_test_flags)(const EVP_CIPHER_CTX *, int);
 static int (*dlsym_EVP_CIPHER_CTX_test_flags)(const EVP_CIPHER_CTX *, int);
 static int (*dlsym_EVP_CIPHER_CTX_block_size)(const EVP_CIPHER_CTX *);
 static int (*dlsym_EVP_CIPHER_CTX_block_size)(const EVP_CIPHER_CTX *);
@@ -123,10 +127,16 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_crypto_OpensslCipher_initIDs
                       "EVP_CIPHER_CTX_new");
                       "EVP_CIPHER_CTX_new");
   LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_free, env, openssl,  \
   LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_free, env, openssl,  \
                       "EVP_CIPHER_CTX_free");
                       "EVP_CIPHER_CTX_free");
+#if OPENSSL_API_COMPAT < 0x10100000L && OPENSSL_VERSION_NUMBER >= 0x10100000L
+  LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_reset, env, openssl,  \
+                      "EVP_CIPHER_CTX_reset");
+#else
   LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_cleanup, env, openssl,  \
   LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_cleanup, env, openssl,  \
                       "EVP_CIPHER_CTX_cleanup");
                       "EVP_CIPHER_CTX_cleanup");
   LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_init, env, openssl,  \
   LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_init, env, openssl,  \
                       "EVP_CIPHER_CTX_init");
                       "EVP_CIPHER_CTX_init");
+#endif
+
   LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_set_padding, env, openssl,  \
   LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_set_padding, env, openssl,  \
                       "EVP_CIPHER_CTX_set_padding");
                       "EVP_CIPHER_CTX_set_padding");
   LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_test_flags, env, openssl,  \
   LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_test_flags, env, openssl,  \
@@ -271,7 +281,11 @@ JNIEXPORT jlong JNICALL Java_org_apache_hadoop_crypto_OpensslCipher_init
   (*env)->ReleaseByteArrayElements(env, key, jKey, 0);
   (*env)->ReleaseByteArrayElements(env, key, jKey, 0);
   (*env)->ReleaseByteArrayElements(env, iv, jIv, 0);
   (*env)->ReleaseByteArrayElements(env, iv, jIv, 0);
   if (rc == 0) {
   if (rc == 0) {
+#if OPENSSL_API_COMPAT < 0x10100000L && OPENSSL_VERSION_NUMBER >= 0x10100000L
+    dlsym_EVP_CIPHER_CTX_reset(context);
+#else
     dlsym_EVP_CIPHER_CTX_cleanup(context);
     dlsym_EVP_CIPHER_CTX_cleanup(context);
+#endif
     THROW(env, "java/lang/InternalError", "Error in EVP_CipherInit_ex.");
     THROW(env, "java/lang/InternalError", "Error in EVP_CipherInit_ex.");
     return (jlong)0;
     return (jlong)0;
   }
   }
@@ -334,7 +348,11 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_crypto_OpensslCipher_update
   int output_len = 0;
   int output_len = 0;
   if (!dlsym_EVP_CipherUpdate(context, output_bytes, &output_len,  \
   if (!dlsym_EVP_CipherUpdate(context, output_bytes, &output_len,  \
       input_bytes, input_len)) {
       input_bytes, input_len)) {
+#if OPENSSL_API_COMPAT < 0x10100000L && OPENSSL_VERSION_NUMBER >= 0x10100000L
+    dlsym_EVP_CIPHER_CTX_reset(context);
+#else
     dlsym_EVP_CIPHER_CTX_cleanup(context);
     dlsym_EVP_CIPHER_CTX_cleanup(context);
+#endif
     THROW(env, "java/lang/InternalError", "Error in EVP_CipherUpdate.");
     THROW(env, "java/lang/InternalError", "Error in EVP_CipherUpdate.");
     return 0;
     return 0;
   }
   }
@@ -376,7 +394,11 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_crypto_OpensslCipher_doFinal
   
   
   int output_len = 0;
   int output_len = 0;
   if (!dlsym_EVP_CipherFinal_ex(context, output_bytes, &output_len)) {
   if (!dlsym_EVP_CipherFinal_ex(context, output_bytes, &output_len)) {
+#if OPENSSL_API_COMPAT < 0x10100000L && OPENSSL_VERSION_NUMBER >= 0x10100000L
+    dlsym_EVP_CIPHER_CTX_reset(context);
+#else
     dlsym_EVP_CIPHER_CTX_cleanup(context);
     dlsym_EVP_CIPHER_CTX_cleanup(context);
+#endif
     THROW(env, "java/lang/InternalError", "Error in EVP_CipherFinal_ex.");
     THROW(env, "java/lang/InternalError", "Error in EVP_CipherFinal_ex.");
     return 0;
     return 0;
   }
   }
@@ -396,6 +418,16 @@ JNIEXPORT jstring JNICALL Java_org_apache_hadoop_crypto_OpensslCipher_getLibrary
     (JNIEnv *env, jclass clazz) 
     (JNIEnv *env, jclass clazz) 
 {
 {
 #ifdef UNIX
 #ifdef UNIX
+#if OPENSSL_API_COMPAT < 0x10100000L && OPENSSL_VERSION_NUMBER >= 0x10100000L
+  if (dlsym_EVP_CIPHER_CTX_reset) {
+    Dl_info dl_info;
+    if(dladdr(
+        dlsym_EVP_CIPHER_CTX_reset,
+        &dl_info)) {
+      return (*env)->NewStringUTF(env, dl_info.dli_fname);
+    }
+  }
+#else
   if (dlsym_EVP_CIPHER_CTX_init) {
   if (dlsym_EVP_CIPHER_CTX_init) {
     Dl_info dl_info;
     Dl_info dl_info;
     if(dladdr(
     if(dladdr(
@@ -404,6 +436,7 @@ JNIEXPORT jstring JNICALL Java_org_apache_hadoop_crypto_OpensslCipher_getLibrary
       return (*env)->NewStringUTF(env, dl_info.dli_fname);
       return (*env)->NewStringUTF(env, dl_info.dli_fname);
     }
     }
   }
   }
+#endif
 
 
   return (*env)->NewStringUTF(env, HADOOP_OPENSSL_LIBRARY);
   return (*env)->NewStringUTF(env, HADOOP_OPENSSL_LIBRARY);
 #endif
 #endif

+ 31 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java

@@ -17,6 +17,7 @@
  */
  */
 package org.apache.hadoop.conf;
 package org.apache.hadoop.conf;
 
 
+import java.io.BufferedInputStream;
 import java.io.BufferedWriter;
 import java.io.BufferedWriter;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.ByteArrayOutputStream;
@@ -2419,4 +2420,34 @@ public class TestConfiguration {
       System.setOut(output);
       System.setOut(output);
     }
     }
   }
   }
+
+  /**
+   * Test race conditions between clone() and getProps().
+   * Test for race conditions in the way Hadoop handles the Configuration
+   * class. The scenario is the following. Let's assume that there are two
+   * threads sharing the same Configuration class. One adds some resources
+   * to the configuration, while the other one clones it. Resources are
+   * loaded lazily in a deferred call to loadResources(). If the cloning
+   * happens after adding the resources but before parsing them, some temporary
+   * resources like input stream pointers are cloned. Eventually both copies
+   * will load the same input stream resources.
+   * One parses the input stream XML and closes it updating it's own copy of
+   * the resource. The other one has another pointer to the same input stream.
+   * When it tries to load it, it will crash with a stream closed exception.
+   */
+  @Test
+  public void testResourceRace() {
+    InputStream is =
+        new BufferedInputStream(new ByteArrayInputStream(
+            "<configuration></configuration>".getBytes()));
+    Configuration config = new Configuration();
+    // Thread 1
+    config.addResource(is);
+    // Thread 2
+    Configuration confClone = new Configuration(conf);
+    // Thread 2
+    confClone.get("firstParse");
+    // Thread 1
+    config.get("secondParse");
+  }
 }
 }

+ 2 - 2
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextTestHelper.java

@@ -43,7 +43,7 @@ public final class FileContextTestHelper {
    * Create a context with test root relative to the test directory
    * Create a context with test root relative to the test directory
    */
    */
   public FileContextTestHelper() {
   public FileContextTestHelper() {
-    this(GenericTestUtils.getRandomizedTestDir().getAbsolutePath());
+    this(GenericTestUtils.getRandomizedTestDir().getPath());
   }
   }
 
 
   /**
   /**
@@ -83,7 +83,7 @@ public final class FileContextTestHelper {
         absTestRootDir = testRootDir;
         absTestRootDir = testRootDir;
       } else {
       } else {
         absTestRootDir = fc.getWorkingDirectory().toString() + "/"
         absTestRootDir = fc.getWorkingDirectory().toString() + "/"
-            + testRootDir;
+            + new Path(testRootDir).toUri();
       }
       }
     }
     }
     return absTestRootDir;
     return absTestRootDir;

+ 31 - 1
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java

@@ -228,9 +228,9 @@ public class ContractTestUtils extends Assert {
   public static void verifyFileContents(FileSystem fs,
   public static void verifyFileContents(FileSystem fs,
                                         Path path,
                                         Path path,
                                         byte[] original) throws IOException {
                                         byte[] original) throws IOException {
+    assertIsFile(fs, path);
     FileStatus stat = fs.getFileStatus(path);
     FileStatus stat = fs.getFileStatus(path);
     String statText = stat.toString();
     String statText = stat.toString();
-    assertTrue("not a file " + statText, stat.isFile());
     assertEquals("wrong length " + statText, original.length, stat.getLen());
     assertEquals("wrong length " + statText, original.length, stat.getLen());
     byte[] bytes = readDataset(fs, path, original.length);
     byte[] bytes = readDataset(fs, path, original.length);
     compareByteArrays(original, bytes, original.length);
     compareByteArrays(original, bytes, original.length);
@@ -853,6 +853,36 @@ public class ContractTestUtils extends Assert {
                        status.isSymlink());
                        status.isSymlink());
   }
   }
 
 
+  /**
+   * Assert that a varargs list of paths exist.
+   * @param fs filesystem
+   * @param message message for exceptions
+   * @param paths paths
+   * @throws IOException IO failure
+   */
+  public static void assertPathsExist(FileSystem fs,
+      String message,
+      Path... paths) throws IOException {
+    for (Path path : paths) {
+      assertPathExists(fs, message, path);
+    }
+  }
+
+  /**
+   * Assert that a varargs list of paths do not exist.
+   * @param fs filesystem
+   * @param message message for exceptions
+   * @param paths paths
+   * @throws IOException IO failure
+   */
+  public static void assertPathsDoNotExist(FileSystem fs,
+      String message,
+      Path... paths) throws IOException {
+    for (Path path : paths) {
+      assertPathDoesNotExist(fs, message, path);
+    }
+  }
+
   /**
   /**
    * Create a dataset for use in the tests; all data is in the range
    * Create a dataset for use in the tests; all data is in the range
    * base to (base+modulo-1) inclusive.
    * base to (base+modulo-1) inclusive.

+ 5 - 2
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java

@@ -44,6 +44,7 @@ import org.apache.commons.lang.RandomStringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Time;
 import org.apache.hadoop.util.Time;
 import org.apache.log4j.Appender;
 import org.apache.log4j.Appender;
@@ -248,7 +249,7 @@ public abstract class GenericTestUtils {
    * @return the absolute directory for tests. Caller is expected to create it.
    * @return the absolute directory for tests. Caller is expected to create it.
    */
    */
   public static File getRandomizedTestDir() {
   public static File getRandomizedTestDir() {
-    return new File(getRandomizedTempPath()).getAbsoluteFile();
+    return new File(getRandomizedTempPath());
   }
   }
 
 
   /**
   /**
@@ -259,7 +260,9 @@ public abstract class GenericTestUtils {
    * @return a string to use in paths
    * @return a string to use in paths
    */
    */
   public static String getTempPath(String subpath) {
   public static String getTempPath(String subpath) {
-    String prop = System.getProperty(SYSPROP_TEST_DATA_DIR, DEFAULT_TEST_DATA_PATH);
+    String prop = (Path.WINDOWS) ? DEFAULT_TEST_DATA_PATH
+        : System.getProperty(SYSPROP_TEST_DATA_DIR, DEFAULT_TEST_DATA_PATH);
+
     if (prop.isEmpty()) {
     if (prop.isEmpty()) {
       // corner case: property is there but empty
       // corner case: property is there but empty
       prop = DEFAULT_TEST_DATA_PATH;
       prop = DEFAULT_TEST_DATA_PATH;

+ 6 - 0
hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java

@@ -28,6 +28,7 @@ import javax.servlet.ServletContextListener;
 import com.codahale.metrics.JmxReporter;
 import com.codahale.metrics.JmxReporter;
 import com.codahale.metrics.Meter;
 import com.codahale.metrics.Meter;
 import com.codahale.metrics.MetricRegistry;
 import com.codahale.metrics.MetricRegistry;
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.crypto.key.CachingKeyProvider;
 import org.apache.hadoop.crypto.key.CachingKeyProvider;
@@ -159,6 +160,11 @@ public class KMSWebApp implements ServletContextListener {
       }
       }
       KeyProvider keyProvider =
       KeyProvider keyProvider =
           KeyProviderFactory.get(new URI(providerString), kmsConf);
           KeyProviderFactory.get(new URI(providerString), kmsConf);
+      Preconditions.checkNotNull(keyProvider, String.format("No" +
+              " KeyProvider has been initialized, please" +
+              " check whether %s '%s' is configured correctly in" +
+              " kms-site.xml.", KMSConfiguration.KEY_PROVIDER_URI,
+          providerString));
       if (kmsConf.getBoolean(KMSConfiguration.KEY_CACHE_ENABLE,
       if (kmsConf.getBoolean(KMSConfiguration.KEY_CACHE_ENABLE,
           KMSConfiguration.KEY_CACHE_ENABLE_DEFAULT)) {
           KMSConfiguration.KEY_CACHE_ENABLE_DEFAULT)) {
         long keyTimeOutMillis =
         long keyTimeOutMillis =

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java

@@ -2910,7 +2910,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory,
    * @param num Number of threads for hedged reads thread pool.
    * @param num Number of threads for hedged reads thread pool.
    * If zero, skip hedged reads thread pool creation.
    * If zero, skip hedged reads thread pool creation.
    */
    */
-  private synchronized void initThreadsNumForHedgedReads(int num) {
+  private static synchronized void initThreadsNumForHedgedReads(int num) {
     if (num <= 0 || HEDGED_READ_THREAD_POOL != null) return;
     if (num <= 0 || HEDGED_READ_THREAD_POOL != null) return;
     HEDGED_READ_THREAD_POOL = new ThreadPoolExecutor(1, num, 60,
     HEDGED_READ_THREAD_POOL = new ThreadPoolExecutor(1, num, 60,
         TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
         TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),

+ 98 - 18
hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml

@@ -31,6 +31,11 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
 
 
   <properties>
   <properties>
     <require.fuse>false</require.fuse>
     <require.fuse>false</require.fuse>
+    <require.libwebhdfs>false</require.libwebhdfs>
+    <require.valgrind>false</require.valgrind>
+    <native_ctest_args></native_ctest_args>
+    <native_cmake_args></native_cmake_args>
+    <native_make_args></native_make_args>
     <hadoop.component>hdfs</hadoop.component>
     <hadoop.component>hdfs</hadoop.component>
   </properties>
   </properties>
 
 
@@ -85,6 +90,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
             <exclude>src/main/native/config/*</exclude>
             <exclude>src/main/native/config/*</exclude>
             <exclude>src/main/native/m4/*</exclude>
             <exclude>src/main/native/m4/*</exclude>
             <exclude>src/main/native/fuse-dfs/util/tree.h</exclude>
             <exclude>src/main/native/fuse-dfs/util/tree.h</exclude>
+            <exclude>src/main/native/libhdfspp/third_party/**</exclude>
             <exclude>src/contrib/**</exclude>
             <exclude>src/contrib/**</exclude>
           </excludes>
           </excludes>
         </configuration>
         </configuration>
@@ -138,17 +144,16 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
                 </goals>
                 </goals>
                 <configuration>
                 <configuration>
                   <target>
                   <target>
-                    <condition property="generator" value="Visual Studio 10" else="Visual Studio 10 Win64">
-                      <equals arg1="Win32" arg2="${env.PLATFORM}" />
-                    </condition>
                     <mkdir dir="${project.build.directory}/native"/>
                     <mkdir dir="${project.build.directory}/native"/>
                     <exec executable="cmake" dir="${project.build.directory}/native"
                     <exec executable="cmake" dir="${project.build.directory}/native"
                           failonerror="true">
                           failonerror="true">
-                      <arg line="${basedir}/src/ -DGENERATED_JAVAH=${project.build.directory}/native/javah -DJVM_ARCH_DATA_MODEL=${sun.arch.data.model} -DREQUIRE_FUSE=${require.fuse} -G '${generator}'"/>
+                      <arg line="${basedir}/src/ -DGENERATED_JAVAH=${project.build.directory}/native/javah -DJVM_ARCH_DATA_MODEL=${sun.arch.data.model} -DHADOOP_BUILD=1 -DREQUIRE_FUSE=${require.fuse} -DREQUIRE_VALGRIND=${require.valgrind} -A '${env.PLATFORM}'"/>
+                      <arg line="${native_cmake_args}"/>
                     </exec>
                     </exec>
                     <exec executable="msbuild" dir="${project.build.directory}/native"
                     <exec executable="msbuild" dir="${project.build.directory}/native"
                           failonerror="true">
                           failonerror="true">
                       <arg line="ALL_BUILD.vcxproj /nologo /p:Configuration=RelWithDebInfo /p:LinkIncremental=false"/>
                       <arg line="ALL_BUILD.vcxproj /nologo /p:Configuration=RelWithDebInfo /p:LinkIncremental=false"/>
+                      <arg line="${native_make_args}"/>
                     </exec>
                     </exec>
                     <!-- Copy for inclusion in distribution. -->
                     <!-- Copy for inclusion in distribution. -->
                     <copy todir="${project.build.directory}/bin">
                     <copy todir="${project.build.directory}/bin">
@@ -167,11 +172,15 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
                     <property name="compile_classpath" refid="maven.compile.classpath"/>
                     <property name="compile_classpath" refid="maven.compile.classpath"/>
                     <property name="test_classpath" refid="maven.test.classpath"/>
                     <property name="test_classpath" refid="maven.test.classpath"/>
                     <exec executable="ctest" failonerror="true" dir="${project.build.directory}/native">
                     <exec executable="ctest" failonerror="true" dir="${project.build.directory}/native">
+                      <arg line="--output-on-failure"/>
+                      <arg line="${native_ctest_args}"/>
                       <env key="CLASSPATH" value="${test_classpath}:${compile_classpath}"/>
                       <env key="CLASSPATH" value="${test_classpath}:${compile_classpath}"/>
                       <!-- HADOOP_HOME required to find winutils. -->
                       <!-- HADOOP_HOME required to find winutils. -->
                       <env key="HADOOP_HOME" value="${hadoop.common.build.dir}"/>
                       <env key="HADOOP_HOME" value="${hadoop.common.build.dir}"/>
                       <!-- Make sure hadoop.dll and jvm.dll are on PATH. -->
                       <!-- Make sure hadoop.dll and jvm.dll are on PATH. -->
                       <env key="PATH" value="${env.PATH};${hadoop.common.build.dir}/bin;${java.home}/jre/bin/server;${java.home}/bin/server"/>
                       <env key="PATH" value="${env.PATH};${hadoop.common.build.dir}/bin;${java.home}/jre/bin/server;${java.home}/bin/server"/>
+                      <!-- Make sure libhadoop.so is on LD_LIBRARY_PATH. -->
+                      <env key="LD_LIBRARY_PATH" value="${env.LD_LIBRARY_PATH}:${project.build.directory}/native/target/usr/local/lib:${hadoop.common.build.dir}/native/target/usr/local/lib"/>
                     </exec>
                     </exec>
                   </target>
                   </target>
                 </configuration>
                 </configuration>
@@ -192,31 +201,90 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
       <build>
       <build>
         <plugins>
         <plugins>
           <plugin>
           <plugin>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-maven-plugins</artifactId>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-antrun-plugin</artifactId>
             <executions>
             <executions>
               <execution>
               <execution>
-                <id>cmake-compile</id>
+                <id>make</id>
                 <phase>compile</phase>
                 <phase>compile</phase>
-                <goals><goal>cmake-compile</goal></goals>
+                <goals><goal>run</goal></goals>
+                <configuration>
+                  <target>
+                    <mkdir dir="${project.build.directory}"/>
+                    <exec executable="cmake" dir="${project.build.directory}" failonerror="true">
+                      <arg line="${basedir}/src/ -DGENERATED_JAVAH=${project.build.directory}/native/javah -DJVM_ARCH_DATA_MODEL=${sun.arch.data.model}  -DHADOOP_BUILD=1 -DREQUIRE_LIBWEBHDFS=${require.libwebhdfs} -DREQUIRE_FUSE=${require.fuse} -DREQUIRE_VALGRIND=${require.valgrind} "/>
+                      <arg line="${native_cmake_args}"/>
+                    </exec>
+                    <exec executable="make" dir="${project.build.directory}" failonerror="true">
+                      <arg line="${native_make_args}"/>
+                    </exec>
+                  </target>
+                </configuration>
+              </execution>
+              <execution>
+                <id>native_tests</id>
+                <phase>test</phase>
+                <goals><goal>run</goal></goals>
                 <configuration>
                 <configuration>
-                  <source>${basedir}/src</source>
-                  <vars>
-                    <GENERATED_JAVAH>${project.build.directory}/native/javah</GENERATED_JAVAH>
-                    <JVM_ARCH_DATA_MODEL>${sun.arch.data.model}</JVM_ARCH_DATA_MODEL>
-                    <REQUIRE_FUSE>${require.fuse}</REQUIRE_FUSE>
-                  </vars>
-                  <output>${project.build.directory}</output>
+                  <skip>${skipTests}</skip>
+                  <target>
+                    <property name="compile_classpath" refid="maven.compile.classpath"/>
+                    <property name="test_classpath" refid="maven.test.classpath"/>
+                    <exec executable="ctest" failonerror="true" dir="${project.build.directory}/">
+                      <arg line="--output-on-failure"/>
+                      <arg line="${native_ctest_args}"/>
+                      <env key="CLASSPATH" value="${test_classpath}:${compile_classpath}"/>
+                      <!-- Make sure libhadoop.so is on LD_LIBRARY_PATH. -->
+                      <env key="LD_LIBRARY_PATH" value="${env.LD_LIBRARY_PATH}:${project.build.directory}/native/target/usr/local/lib:${hadoop.common.build.dir}/native/target/usr/local/lib"/>
+                    </exec>
+                  </target>
                 </configuration>
                 </configuration>
               </execution>
               </execution>
             </executions>
             </executions>
           </plugin>
           </plugin>
+        </plugins>
+      </build>
+    </profile>
+    <profile>
+      <id>test-patch</id>
+      <activation>
+        <activeByDefault>false</activeByDefault>
+      </activation>
+      <properties>
+        <runningWithNative>true</runningWithNative>
+      </properties>
+      <build>
+        <plugins>
           <plugin>
           <plugin>
             <groupId>org.apache.maven.plugins</groupId>
             <groupId>org.apache.maven.plugins</groupId>
             <artifactId>maven-antrun-plugin</artifactId>
             <artifactId>maven-antrun-plugin</artifactId>
             <executions>
             <executions>
               <execution>
               <execution>
-                <id>native_tests</id>
+                <id>make_altern</id>
+                <phase>compile</phase>
+                <goals><goal>run</goal></goals>
+                <configuration>
+                  <target>
+                    <mkdir dir="${project.build.directory}/altern"/>
+                    <condition property="c_compiler" value="clang" else="gcc">
+                      <contains string="${env.CC}" substring="gcc"/>
+                    </condition>
+                    <condition property="cxx_compiler" value="clang++" else="g++">
+                      <contains string="${env.CXX}" substring="g++"/>
+                    </condition>
+                    <exec executable="cmake" dir="${project.build.directory}/altern" failonerror="true">
+                      <arg line="${basedir}/src/ -DGENERATED_JAVAH=${project.build.directory}/altern/native/javah -DJVM_ARCH_DATA_MODEL=${sun.arch.data.model}  -DHADOOP_BUILD=1 -DREQUIRE_LIBWEBHDFS=${require.libwebhdfs} -DREQUIRE_FUSE=${require.fuse} -DREQUIRE_VALGRIND=${require.valgrind} "/>
+                      <arg line="-DCMAKE_C_COMPILER=${c_compiler} -DCMAKE_CXX_COMPILER=${cxx_compiler}"/>
+                      <arg line="${native_cmake_args}"/>
+                    </exec>
+                    <exec executable="make" dir="${project.build.directory}/altern" failonerror="true">
+                      <arg line="${native_make_args}"/>
+                    </exec>
+                  </target>
+                </configuration>
+              </execution>
+              <execution>
+                <id>native_tests_altern</id>
                 <phase>test</phase>
                 <phase>test</phase>
                 <goals><goal>run</goal></goals>
                 <goals><goal>run</goal></goals>
                 <configuration>
                 <configuration>
@@ -224,14 +292,26 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
                   <target>
                   <target>
                     <property name="compile_classpath" refid="maven.compile.classpath"/>
                     <property name="compile_classpath" refid="maven.compile.classpath"/>
                     <property name="test_classpath" refid="maven.test.classpath"/>
                     <property name="test_classpath" refid="maven.test.classpath"/>
-                    <exec executable="ctest" failonerror="true" dir="${project.build.directory}/">
+                    <exec executable="ctest" failonerror="true" dir="${project.build.directory}/altern">
+                      <arg line="--output-on-failure"/>
+                      <arg line="${native_ctest_args}"/>
                       <env key="CLASSPATH" value="${test_classpath}:${compile_classpath}"/>
                       <env key="CLASSPATH" value="${test_classpath}:${compile_classpath}"/>
                       <!-- Make sure libhadoop.so is on LD_LIBRARY_PATH. -->
                       <!-- Make sure libhadoop.so is on LD_LIBRARY_PATH. -->
-                      <env key="LD_LIBRARY_PATH" value="${env.LD_LIBRARY_PATH}:${project.build.directory}/native/target/usr/local/lib:${hadoop.common.build.dir}/native/target/usr/local/lib"/>
+                      <env key="LD_LIBRARY_PATH" value="${env.LD_LIBRARY_PATH}:${project.build.directory}/altern/target/usr/local/lib:${hadoop.common.build.dir}/native/target/usr/local/lib"/>
                     </exec>
                     </exec>
                   </target>
                   </target>
                 </configuration>
                 </configuration>
               </execution>
               </execution>
+              <execution>
+                <id>clean_altern</id>
+                <phase>test</phase>
+                <goals><goal>run</goal></goals>
+                <configuration>
+                  <target>
+                    <delete dir="${project.build.directory}/altern" includeemptydirs="true"/>
+                  </target>
+                </configuration>
+              </execution>
             </executions>
             </executions>
           </plugin>
           </plugin>
         </plugins>
         </plugins>

+ 7 - 10
hadoop-hdfs-project/hadoop-hdfs-native-client/src/CMakeLists.txt

@@ -58,19 +58,11 @@ if(WIN32)
     # Omit unneeded headers.
     # Omit unneeded headers.
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DWIN32_LEAN_AND_MEAN")
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DWIN32_LEAN_AND_MEAN")
     set(OS_DIR ${CMAKE_SOURCE_DIR}/main/native/libhdfs/os/windows)
     set(OS_DIR ${CMAKE_SOURCE_DIR}/main/native/libhdfs/os/windows)
-
-    # IMPORTANT: OUT_DIR MUST be relative to maven's
-    # project.build.directory (=target) and match dist-copynativelibs
-    # in order to be in a release
-    set(OUT_DIR bin)
+    set(OUT_DIR target/bin)
 else()
 else()
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
     set(OS_DIR ${CMAKE_SOURCE_DIR}/main/native/libhdfs/os/posix)
     set(OS_DIR ${CMAKE_SOURCE_DIR}/main/native/libhdfs/os/posix)
-
-    # IMPORTANT: OUT_DIR MUST be relative to maven's
-    # project.build.directory (=target) and match dist-copynativelibs
-    # in order to be in a release
-    set(OUT_DIR native/target/usr/local/lib)
+    set(OUT_DIR target/usr/local/lib)
 endif()
 endif()
 
 
 # Configure JNI.
 # Configure JNI.
@@ -98,6 +90,11 @@ endfunction()
 
 
 add_subdirectory(main/native/libhdfs)
 add_subdirectory(main/native/libhdfs)
 add_subdirectory(main/native/libhdfs-tests)
 add_subdirectory(main/native/libhdfs-tests)
+add_subdirectory(main/native/libhdfspp)
+
+if(REQUIRE_LIBWEBHDFS)
+    add_subdirectory(contrib/libwebhdfs)
+endif()
 
 
 # Find Linux FUSE
 # Find Linux FUSE
 if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
 if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")

+ 16 - 6
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/native_mini_dfs.c

@@ -182,6 +182,16 @@ struct NativeMiniDfsCluster* nmdCreate(struct NativeMiniDfsConf *conf)
         }
         }
         (*env)->DeleteLocalRef(env, val.l);
         (*env)->DeleteLocalRef(env, val.l);
     }
     }
+    if (conf->numDataNodes) {
+        jthr = invokeMethod(env, &val, INSTANCE, bld, MINIDFS_CLUSTER_BUILDER,
+                "numDataNodes", "(I)L" MINIDFS_CLUSTER_BUILDER ";", conf->numDataNodes);
+        if (jthr) {
+            printExceptionAndFree(env, jthr, PRINT_EXC_ALL, "nmdCreate: "
+                                  "Builder::numDataNodes");
+            goto error;
+        }
+    }
+    (*env)->DeleteLocalRef(env, val.l);
     jthr = invokeMethod(env, &val, INSTANCE, bld, MINIDFS_CLUSTER_BUILDER,
     jthr = invokeMethod(env, &val, INSTANCE, bld, MINIDFS_CLUSTER_BUILDER,
             "build", "()L" MINIDFS_CLUSTER ";");
             "build", "()L" MINIDFS_CLUSTER ";");
     if (jthr) {
     if (jthr) {
@@ -291,7 +301,7 @@ int nmdGetNameNodeHttpAddress(const struct NativeMiniDfsCluster *cl,
     jthrowable jthr;
     jthrowable jthr;
     int ret = 0;
     int ret = 0;
     const char *host;
     const char *host;
-    
+
     if (!env) {
     if (!env) {
         fprintf(stderr, "nmdHdfsConnect: getJNIEnv failed\n");
         fprintf(stderr, "nmdHdfsConnect: getJNIEnv failed\n");
         return -EIO;
         return -EIO;
@@ -306,7 +316,7 @@ int nmdGetNameNodeHttpAddress(const struct NativeMiniDfsCluster *cl,
         return -EIO;
         return -EIO;
     }
     }
     jNameNode = jVal.l;
     jNameNode = jVal.l;
-    
+
     // Then get the http address (InetSocketAddress) of the NameNode
     // Then get the http address (InetSocketAddress) of the NameNode
     jthr = invokeMethod(env, &jVal, INSTANCE, jNameNode, HADOOP_NAMENODE,
     jthr = invokeMethod(env, &jVal, INSTANCE, jNameNode, HADOOP_NAMENODE,
                         "getHttpAddress", "()L" JAVA_INETSOCKETADDRESS ";");
                         "getHttpAddress", "()L" JAVA_INETSOCKETADDRESS ";");
@@ -317,7 +327,7 @@ int nmdGetNameNodeHttpAddress(const struct NativeMiniDfsCluster *cl,
         goto error_dlr_nn;
         goto error_dlr_nn;
     }
     }
     jAddress = jVal.l;
     jAddress = jVal.l;
-    
+
     jthr = invokeMethod(env, &jVal, INSTANCE, jAddress,
     jthr = invokeMethod(env, &jVal, INSTANCE, jAddress,
                         JAVA_INETSOCKETADDRESS, "getPort", "()I");
                         JAVA_INETSOCKETADDRESS, "getPort", "()I");
     if (jthr) {
     if (jthr) {
@@ -327,7 +337,7 @@ int nmdGetNameNodeHttpAddress(const struct NativeMiniDfsCluster *cl,
         goto error_dlr_addr;
         goto error_dlr_addr;
     }
     }
     *port = jVal.i;
     *port = jVal.i;
-    
+
     jthr = invokeMethod(env, &jVal, INSTANCE, jAddress, JAVA_INETSOCKETADDRESS,
     jthr = invokeMethod(env, &jVal, INSTANCE, jAddress, JAVA_INETSOCKETADDRESS,
                         "getHostName", "()Ljava/lang/String;");
                         "getHostName", "()Ljava/lang/String;");
     if (jthr) {
     if (jthr) {
@@ -339,12 +349,12 @@ int nmdGetNameNodeHttpAddress(const struct NativeMiniDfsCluster *cl,
     host = (*env)->GetStringUTFChars(env, jVal.l, NULL);
     host = (*env)->GetStringUTFChars(env, jVal.l, NULL);
     *hostName = strdup(host);
     *hostName = strdup(host);
     (*env)->ReleaseStringUTFChars(env, jVal.l, host);
     (*env)->ReleaseStringUTFChars(env, jVal.l, host);
-    
+
 error_dlr_addr:
 error_dlr_addr:
     (*env)->DeleteLocalRef(env, jAddress);
     (*env)->DeleteLocalRef(env, jAddress);
 error_dlr_nn:
 error_dlr_nn:
     (*env)->DeleteLocalRef(env, jNameNode);
     (*env)->DeleteLocalRef(env, jNameNode);
-    
+
     return ret;
     return ret;
 }
 }
 
 

+ 8 - 3
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/native_mini_dfs.h

@@ -26,7 +26,7 @@ extern  "C" {
 #endif
 #endif
 
 
 struct hdfsBuilder;
 struct hdfsBuilder;
-struct NativeMiniDfsCluster; 
+struct NativeMiniDfsCluster;
 
 
 /**
 /**
  * Represents a configuration to use for creating a Native MiniDFSCluster
  * Represents a configuration to use for creating a Native MiniDFSCluster
@@ -51,6 +51,11 @@ struct NativeMiniDfsConf {
      * Nonzero if we should configure short circuit.
      * Nonzero if we should configure short circuit.
      */
      */
     jboolean configureShortCircuit;
     jboolean configureShortCircuit;
+
+    /**
+     * The number of datanodes in MiniDfsCluster
+     */
+    jint numDataNodes;
 };
 };
 
 
 /**
 /**
@@ -96,13 +101,13 @@ void nmdFree(struct NativeMiniDfsCluster* cl);
  *
  *
  * @return          the port, or a negative error code
  * @return          the port, or a negative error code
  */
  */
-int nmdGetNameNodePort(const struct NativeMiniDfsCluster *cl); 
+int nmdGetNameNodePort(const struct NativeMiniDfsCluster *cl);
 
 
 /**
 /**
  * Get the http address that's in use by the given (non-HA) nativeMiniDfs
  * Get the http address that's in use by the given (non-HA) nativeMiniDfs
  *
  *
  * @param cl        The initialized NativeMiniDfsCluster
  * @param cl        The initialized NativeMiniDfsCluster
- * @param port      Used to capture the http port of the NameNode 
+ * @param port      Used to capture the http port of the NameNode
  *                  of the NativeMiniDfsCluster
  *                  of the NativeMiniDfsCluster
  * @param hostName  Used to capture the http hostname of the NameNode
  * @param hostName  Used to capture the http hostname of the NameNode
  *                  of the NativeMiniDfsCluster
  *                  of the NativeMiniDfsCluster

+ 350 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/test_libhdfs_mini_stress.c

@@ -0,0 +1,350 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "expect.h"
+#include "hdfs/hdfs.h"
+#include "hdfspp/hdfs_ext.h"
+#include "native_mini_dfs.h"
+#include "os/thread.h"
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define TO_STR_HELPER(X) #X
+#define TO_STR(X) TO_STR_HELPER(X)
+
+#define TLH_MAX_THREADS 10000
+
+#define TLH_MAX_DNS 16
+
+#define TLH_DEFAULT_BLOCK_SIZE 1048576
+
+#define TLH_DEFAULT_DFS_REPLICATION 3
+
+#define TLH_DEFAULT_IPC_CLIENT_CONNECT_MAX_RETRIES 100
+
+#define TLH_DEFAULT_IPC_CLIENT_CONNECT_RETRY_INTERVAL_MS 5
+
+#ifndef RANDOM_ERROR_RATIO
+#define RANDOM_ERROR_RATIO 1000000000
+#endif
+
+struct tlhThreadInfo {
+  /** Thread index */
+  int threadIdx;
+  /** 0 = thread was successful; error code otherwise */
+  int success;
+  /** thread identifier */
+  thread theThread;
+  /** fs, shared with other threads **/
+  hdfsFS hdfs;
+  /** Filename */
+  const char *fileNm;
+
+};
+
+static int hdfsNameNodeConnect(struct NativeMiniDfsCluster *cl, hdfsFS *fs,
+                               const char *username)
+{
+  int ret;
+  tPort port;
+  hdfsFS hdfs;
+  struct hdfsBuilder *bld;
+
+  port = (tPort)nmdGetNameNodePort(cl);
+  if (port < 0) {
+    fprintf(stderr, "hdfsNameNodeConnect: nmdGetNameNodePort "
+            "returned error %d\n", port);
+    return port;
+  }
+  bld = hdfsNewBuilder();
+  if (!bld)
+    return -ENOMEM;
+  hdfsBuilderSetForceNewInstance(bld);
+  hdfsBuilderSetNameNode(bld, "localhost");
+  hdfsBuilderSetNameNodePort(bld, port);
+  hdfsBuilderConfSetStr(bld, "dfs.block.size",
+                        TO_STR(TLH_DEFAULT_BLOCK_SIZE));
+  hdfsBuilderConfSetStr(bld, "dfs.blocksize",
+                        TO_STR(TLH_DEFAULT_BLOCK_SIZE));
+  hdfsBuilderConfSetStr(bld, "dfs.replication",
+                        TO_STR(TLH_DEFAULT_DFS_REPLICATION));
+  hdfsBuilderConfSetStr(bld, "ipc.client.connect.max.retries",
+                        TO_STR(TLH_DEFAULT_IPC_CLIENT_CONNECT_MAX_RETRIES));
+  hdfsBuilderConfSetStr(bld, "ipc.client.connect.retry.interval",
+                        TO_STR(TLH_DEFAULT_IPC_CLIENT_CONNECT_RETRY_INTERVAL_MS));
+  if (username) {
+    hdfsBuilderSetUserName(bld, username);
+  }
+  hdfs = hdfsBuilderConnect(bld);
+  if (!hdfs) {
+    ret = -errno;
+    return ret;
+  }
+  *fs = hdfs;
+  return 0;
+}
+
+static int hdfsWriteData(hdfsFS hdfs, const char *dirNm,
+                         const char *fileNm, tSize fileSz)
+{
+  hdfsFile file;
+  int ret, expected;
+  const char *content;
+
+  content = fileNm;
+
+  if (hdfsExists(hdfs, dirNm) == 0) {
+    EXPECT_ZERO(hdfsDelete(hdfs, dirNm, 1));
+  }
+  EXPECT_ZERO(hdfsCreateDirectory(hdfs, dirNm));
+
+  file = hdfsOpenFile(hdfs, fileNm, O_WRONLY, 0, 0, 0);
+  EXPECT_NONNULL(file);
+
+  expected = (int)strlen(content);
+  tSize sz = 0;
+  while (sz < fileSz) {
+    ret = hdfsWrite(hdfs, file, content, expected);
+    if (ret < 0) {
+      ret = errno;
+      fprintf(stderr, "hdfsWrite failed and set errno %d\n", ret);
+      return ret;
+    }
+    if (ret != expected) {
+      fprintf(stderr, "hdfsWrite was supposed to write %d bytes, but "
+              "it wrote %d\n", ret, expected);
+      return EIO;
+    }
+    sz += ret;
+  }
+  EXPECT_ZERO(hdfsFlush(hdfs, file));
+  EXPECT_ZERO(hdfsHSync(hdfs, file));
+  EXPECT_ZERO(hdfsCloseFile(hdfs, file));
+  return 0;
+}
+
+static int fileEventCallback1(const char * event, const char * cluster, const char * file, int64_t value, int64_t cookie)
+{
+  char * randomErrRatioStr = getenv("RANDOM_ERROR_RATIO");
+  int64_t randomErrRatio = RANDOM_ERROR_RATIO;
+  if (randomErrRatioStr) randomErrRatio = (int64_t)atoi(randomErrRatioStr);
+  if (randomErrRatio == 0) return DEBUG_SIMULATE_ERROR;
+  else if (randomErrRatio < 0) return LIBHDFSPP_EVENT_OK;
+  return random() % randomErrRatio == 0 ? DEBUG_SIMULATE_ERROR : LIBHDFSPP_EVENT_OK;
+}
+
+static int fileEventCallback2(const char * event, const char * cluster, const char * file, int64_t value, int64_t cookie)
+{
+  /* no op */
+  return LIBHDFSPP_EVENT_OK;
+}
+
+static int doTestHdfsMiniStress(struct tlhThreadInfo *ti, int randomErr)
+{
+  char tmp[4096];
+  hdfsFile file;
+  int ret, expected;
+  hdfsFileInfo *fileInfo;
+  uint64_t readOps, nErrs=0;
+  tOffset seekPos;
+  const char *content;
+
+  content = ti->fileNm;
+  expected = (int)strlen(content);
+
+  fileInfo = hdfsGetPathInfo(ti->hdfs, ti->fileNm);
+  EXPECT_NONNULL(fileInfo);
+
+  file = hdfsOpenFile(ti->hdfs, ti->fileNm, O_RDONLY, 0, 0, 0);
+  EXPECT_NONNULL(file);
+
+  libhdfspp_file_event_callback callback = (randomErr != 0) ? &fileEventCallback1 : &fileEventCallback2;
+
+  hdfsPreAttachFileMonitor(callback, 0);
+
+  fprintf(stderr, "testHdfsMiniStress(threadIdx=%d): starting read loop\n",
+          ti->threadIdx);
+  for (readOps=0; readOps < 1000; ++readOps) {
+    EXPECT_ZERO(hdfsCloseFile(ti->hdfs, file));
+    file = hdfsOpenFile(ti->hdfs, ti->fileNm, O_RDONLY, 0, 0, 0);
+    EXPECT_NONNULL(file);
+    seekPos = (((double)random()) / RAND_MAX) * (fileInfo->mSize - expected);
+    seekPos = (seekPos / expected) * expected;
+    ret = hdfsSeek(ti->hdfs, file, seekPos);
+    if (ret < 0) {
+      ret = errno;
+      fprintf(stderr, "hdfsSeek to %"PRIu64" failed and set"
+              " errno %d\n", seekPos, ret);
+      ++nErrs;
+      continue;
+    }
+    ret = hdfsRead(ti->hdfs, file, tmp, expected);
+    if (ret < 0) {
+      ret = errno;
+      fprintf(stderr, "hdfsRead failed and set errno %d\n", ret);
+      ++nErrs;
+      continue;
+    }
+    if (ret != expected) {
+      fprintf(stderr, "hdfsRead was supposed to read %d bytes, but "
+              "it read %d\n", ret, expected);
+      ++nErrs;
+      continue;
+    }
+    ret = memcmp(content, tmp, expected);
+    if (ret) {
+      fprintf(stderr, "hdfsRead result (%.*s) does not match expected (%.*s)",
+              expected, tmp, expected, content);
+      ++nErrs;
+      continue;
+    }
+  }
+  EXPECT_ZERO(hdfsCloseFile(ti->hdfs, file));
+  fprintf(stderr, "testHdfsMiniStress(threadIdx=%d): finished read loop\n",
+          ti->threadIdx);
+  EXPECT_ZERO(nErrs);
+  return 0;
+}
+
+static int testHdfsMiniStressImpl(struct tlhThreadInfo *ti)
+{
+  fprintf(stderr, "testHdfsMiniStress(threadIdx=%d): starting\n",
+          ti->threadIdx);
+  EXPECT_NONNULL(ti->hdfs);
+  // Error injection on, some failures are expected in the read path.
+  // The expectation is that any memory stomps will cascade and cause
+  // the following test to fail.  Ideally RPC errors would be seperated
+  // from BlockReader errors (RPC is expected to recover from disconnects).
+  doTestHdfsMiniStress(ti, 1);
+  // No error injection
+  EXPECT_ZERO(doTestHdfsMiniStress(ti, 0));
+  return 0;
+}
+
+static void testHdfsMiniStress(void *v)
+{
+  struct tlhThreadInfo *ti = (struct tlhThreadInfo*)v;
+  int ret = testHdfsMiniStressImpl(ti);
+  ti->success = ret;
+}
+
+static int checkFailures(struct tlhThreadInfo *ti, int tlhNumThreads)
+{
+  int i, threadsFailed = 0;
+  const char *sep = "";
+
+  for (i = 0; i < tlhNumThreads; i++) {
+    if (ti[i].success != 0) {
+      threadsFailed = 1;
+    }
+  }
+  if (!threadsFailed) {
+    fprintf(stderr, "testLibHdfsMiniStress: all threads succeeded.  SUCCESS.\n");
+    return EXIT_SUCCESS;
+  }
+  fprintf(stderr, "testLibHdfsMiniStress: some threads failed: [");
+  for (i = 0; i < tlhNumThreads; i++) {
+    if (ti[i].success != 0) {
+      fprintf(stderr, "%s%d", sep, i);
+      sep = ", ";
+    }
+  }
+  fprintf(stderr, "].  FAILURE.\n");
+  return EXIT_FAILURE;
+}
+
+/**
+ * Test intended to stress libhdfs client with concurrent requests. Currently focused
+ * on concurrent reads.
+ */
+int main(void)
+{
+  int i, tlhNumThreads;
+  char *dirNm, *fileNm;
+  tSize fileSz;
+  const char *tlhNumThreadsStr, *tlhNumDNsStr;
+  hdfsFS hdfs = NULL;
+  struct NativeMiniDfsCluster* tlhCluster;
+  struct tlhThreadInfo ti[TLH_MAX_THREADS];
+  struct NativeMiniDfsConf conf = {
+      1, /* doFormat */
+  };
+
+  dirNm = "/tlhMiniStressData";
+  fileNm = "/tlhMiniStressData/file";
+  fileSz = 2*1024*1024;
+
+  tlhNumDNsStr = getenv("TLH_NUM_DNS");
+  if (!tlhNumDNsStr) {
+    tlhNumDNsStr = "1";
+  }
+  conf.numDataNodes = atoi(tlhNumDNsStr);
+  if ((conf.numDataNodes <= 0) || (conf.numDataNodes > TLH_MAX_DNS)) {
+    fprintf(stderr, "testLibHdfsMiniStress: must have a number of datanodes "
+            "between 1 and %d inclusive, not %d\n",
+            TLH_MAX_DNS, conf.numDataNodes);
+    return EXIT_FAILURE;
+  }
+
+  tlhNumThreadsStr = getenv("TLH_NUM_THREADS");
+  if (!tlhNumThreadsStr) {
+    tlhNumThreadsStr = "8";
+  }
+  tlhNumThreads = atoi(tlhNumThreadsStr);
+  if ((tlhNumThreads <= 0) || (tlhNumThreads > TLH_MAX_THREADS)) {
+    fprintf(stderr, "testLibHdfsMiniStress: must have a number of threads "
+            "between 1 and %d inclusive, not %d\n",
+            TLH_MAX_THREADS, tlhNumThreads);
+    return EXIT_FAILURE;
+  }
+  memset(&ti[0], 0, sizeof(ti));
+  for (i = 0; i < tlhNumThreads; i++) {
+    ti[i].threadIdx = i;
+  }
+
+  tlhCluster = nmdCreate(&conf);
+  EXPECT_NONNULL(tlhCluster);
+  EXPECT_ZERO(nmdWaitClusterUp(tlhCluster));
+
+  EXPECT_ZERO(hdfsNameNodeConnect(tlhCluster, &hdfs, NULL));
+
+  // Single threaded writes for now.
+  EXPECT_ZERO(hdfsWriteData(hdfs, dirNm, fileNm, fileSz));
+
+  // Multi-threaded reads.
+  for (i = 0; i < tlhNumThreads; i++) {
+    ti[i].theThread.start = testHdfsMiniStress;
+    ti[i].theThread.arg = &ti[i];
+    ti[i].hdfs = hdfs;
+    ti[i].fileNm = fileNm;
+    EXPECT_ZERO(threadCreate(&ti[i].theThread));
+  }
+  for (i = 0; i < tlhNumThreads; i++) {
+    EXPECT_ZERO(threadJoin(&ti[i].theThread));
+  }
+
+  EXPECT_ZERO(hdfsDisconnect(hdfs));
+  EXPECT_ZERO(nmdShutdown(tlhCluster));
+  nmdFree(tlhCluster);
+  return checkFailures(ti, tlhNumThreads);
+}

+ 59 - 11
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/test_libhdfs_threaded.c

@@ -30,6 +30,7 @@
 #include <stdio.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdlib.h>
 #include <string.h>
 #include <string.h>
+#include <limits.h>
 
 
 #define TO_STR_HELPER(X) #X
 #define TO_STR_HELPER(X) #X
 #define TO_STR(X) TO_STR_HELPER(X)
 #define TO_STR(X) TO_STR_HELPER(X)
@@ -56,7 +57,7 @@ static int hdfsSingleNameNodeConnect(struct NativeMiniDfsCluster *cl, hdfsFS *fs
     tPort port;
     tPort port;
     hdfsFS hdfs;
     hdfsFS hdfs;
     struct hdfsBuilder *bld;
     struct hdfsBuilder *bld;
-    
+
     port = (tPort)nmdGetNameNodePort(cl);
     port = (tPort)nmdGetNameNodePort(cl);
     if (port < 0) {
     if (port < 0) {
         fprintf(stderr, "hdfsSingleNameNodeConnect: nmdGetNameNodePort "
         fprintf(stderr, "hdfsSingleNameNodeConnect: nmdGetNameNodePort "
@@ -92,13 +93,12 @@ static int doTestGetDefaultBlockSize(hdfsFS fs, const char *path)
 
 
     blockSize = hdfsGetDefaultBlockSize(fs);
     blockSize = hdfsGetDefaultBlockSize(fs);
     if (blockSize < 0) {
     if (blockSize < 0) {
-        ret = errno;
-        fprintf(stderr, "hdfsGetDefaultBlockSize failed with error %d\n", ret);
-        return ret;
+        fprintf(stderr, "hdfsGetDefaultBlockSize failed with error %d\n", errno);
+        return -1;
     } else if (blockSize != TLH_DEFAULT_BLOCK_SIZE) {
     } else if (blockSize != TLH_DEFAULT_BLOCK_SIZE) {
         fprintf(stderr, "hdfsGetDefaultBlockSize got %"PRId64", but we "
         fprintf(stderr, "hdfsGetDefaultBlockSize got %"PRId64", but we "
                 "expected %d\n", blockSize, TLH_DEFAULT_BLOCK_SIZE);
                 "expected %d\n", blockSize, TLH_DEFAULT_BLOCK_SIZE);
-        return EIO;
+        return -1;
     }
     }
 
 
     blockSize = hdfsGetDefaultBlockSizeAtPath(fs, path);
     blockSize = hdfsGetDefaultBlockSizeAtPath(fs, path);
@@ -109,7 +109,7 @@ static int doTestGetDefaultBlockSize(hdfsFS fs, const char *path)
         return ret;
         return ret;
     } else if (blockSize != TLH_DEFAULT_BLOCK_SIZE) {
     } else if (blockSize != TLH_DEFAULT_BLOCK_SIZE) {
         fprintf(stderr, "hdfsGetDefaultBlockSizeAtPath(%s) got "
         fprintf(stderr, "hdfsGetDefaultBlockSizeAtPath(%s) got "
-                "%"PRId64", but we expected %d\n", 
+                "%"PRId64", but we expected %d\n",
                 path, blockSize, TLH_DEFAULT_BLOCK_SIZE);
                 path, blockSize, TLH_DEFAULT_BLOCK_SIZE);
         return EIO;
         return EIO;
     }
     }
@@ -157,12 +157,19 @@ static int doTestHdfsOperations(struct tlhThreadInfo *ti, hdfsFS fs,
 
 
     EXPECT_ZERO(doTestGetDefaultBlockSize(fs, paths->prefix));
     EXPECT_ZERO(doTestGetDefaultBlockSize(fs, paths->prefix));
 
 
+    /* There is no such directory.
+     * Check that errno is set to ENOENT
+     */
+    char invalid_path[] = "/some_invalid/path";
+    EXPECT_NULL_WITH_ERRNO(hdfsListDirectory(fs, invalid_path, &numEntries), ENOENT);
+
     /* There should be no entry in the directory. */
     /* There should be no entry in the directory. */
     errno = EACCES; // see if errno is set to 0 on success
     errno = EACCES; // see if errno is set to 0 on success
     EXPECT_NULL_WITH_ERRNO(hdfsListDirectory(fs, paths->prefix, &numEntries), 0);
     EXPECT_NULL_WITH_ERRNO(hdfsListDirectory(fs, paths->prefix, &numEntries), 0);
     if (numEntries != 0) {
     if (numEntries != 0) {
         fprintf(stderr, "hdfsListDirectory set numEntries to "
         fprintf(stderr, "hdfsListDirectory set numEntries to "
                 "%d on empty directory.", numEntries);
                 "%d on empty directory.", numEntries);
+        return EIO;
     }
     }
 
 
     /* There should not be any file to open for reading. */
     /* There should not be any file to open for reading. */
@@ -190,19 +197,45 @@ static int doTestHdfsOperations(struct tlhThreadInfo *ti, hdfsFS fs,
     }
     }
     if (ret != expected) {
     if (ret != expected) {
         fprintf(stderr, "hdfsWrite was supposed to write %d bytes, but "
         fprintf(stderr, "hdfsWrite was supposed to write %d bytes, but "
-                "it wrote %d\n", ret, expected);
+                "it wrote %d\n", expected, ret);
         return EIO;
         return EIO;
     }
     }
     EXPECT_ZERO(hdfsFlush(fs, file));
     EXPECT_ZERO(hdfsFlush(fs, file));
     EXPECT_ZERO(hdfsHSync(fs, file));
     EXPECT_ZERO(hdfsHSync(fs, file));
     EXPECT_ZERO(hdfsCloseFile(fs, file));
     EXPECT_ZERO(hdfsCloseFile(fs, file));
 
 
+    EXPECT_ZERO(doTestGetDefaultBlockSize(fs, paths->file1));
+
     /* There should be 1 entry in the directory. */
     /* There should be 1 entry in the directory. */
-    EXPECT_NONNULL(hdfsListDirectory(fs, paths->prefix, &numEntries));
+    hdfsFileInfo * dirList = hdfsListDirectory(fs, paths->prefix, &numEntries);
+    EXPECT_NONNULL(dirList);
     if (numEntries != 1) {
     if (numEntries != 1) {
         fprintf(stderr, "hdfsListDirectory set numEntries to "
         fprintf(stderr, "hdfsListDirectory set numEntries to "
                 "%d on directory containing 1 file.", numEntries);
                 "%d on directory containing 1 file.", numEntries);
     }
     }
+    hdfsFreeFileInfo(dirList, numEntries);
+
+    /* Create many files for ListDirectory to page through */
+    char listDirTest[PATH_MAX];
+    strcpy(listDirTest, paths->prefix);
+    strcat(listDirTest, "/for_list_test/");
+    EXPECT_ZERO(hdfsCreateDirectory(fs, listDirTest));
+    int nFile;
+    for (nFile = 0; nFile < 10000; nFile++) {
+      char filename[PATH_MAX];
+      snprintf(filename, PATH_MAX, "%s/many_files_%d", listDirTest, nFile);
+      file = hdfsOpenFile(fs, filename, O_WRONLY, 0, 0, 0);
+      EXPECT_NONNULL(file);
+      EXPECT_ZERO(hdfsCloseFile(fs, file));
+    }
+    dirList = hdfsListDirectory(fs, listDirTest, &numEntries);
+    EXPECT_NONNULL(dirList);
+    hdfsFreeFileInfo(dirList, numEntries);
+    if (numEntries != 10000) {
+        fprintf(stderr, "hdfsListDirectory set numEntries to "
+                "%d on directory containing 10000 files.", numEntries);
+        return EIO;
+    }
 
 
     /* Let's re-open the file for reading */
     /* Let's re-open the file for reading */
     file = hdfsOpenFile(fs, paths->file1, O_RDONLY, 0, 0, 0);
     file = hdfsOpenFile(fs, paths->file1, O_RDONLY, 0, 0, 0);
@@ -246,8 +279,8 @@ static int doTestHdfsOperations(struct tlhThreadInfo *ti, hdfsFS fs,
     EXPECT_ZERO(memcmp(paths->prefix, tmp, expected));
     EXPECT_ZERO(memcmp(paths->prefix, tmp, expected));
     EXPECT_ZERO(hdfsCloseFile(fs, file));
     EXPECT_ZERO(hdfsCloseFile(fs, file));
 
 
-    // TODO: Non-recursive delete should fail?
-    //EXPECT_NONZERO(hdfsDelete(fs, prefix, 0));
+    //Non-recursive delete fails
+    EXPECT_NONZERO(hdfsDelete(fs, paths->prefix, 0));
     EXPECT_ZERO(hdfsCopy(fs, paths->file1, fs, paths->file2));
     EXPECT_ZERO(hdfsCopy(fs, paths->file1, fs, paths->file2));
 
 
     EXPECT_ZERO(hdfsChown(fs, paths->file2, NULL, NULL));
     EXPECT_ZERO(hdfsChown(fs, paths->file2, NULL, NULL));
@@ -274,6 +307,17 @@ static int doTestHdfsOperations(struct tlhThreadInfo *ti, hdfsFS fs,
 
 
     snprintf(tmp, sizeof(tmp), "%s/nonexistent-file-name", paths->prefix);
     snprintf(tmp, sizeof(tmp), "%s/nonexistent-file-name", paths->prefix);
     EXPECT_NEGATIVE_ONE_WITH_ERRNO(hdfsChown(fs, tmp, "ha3", NULL), ENOENT);
     EXPECT_NEGATIVE_ONE_WITH_ERRNO(hdfsChown(fs, tmp, "ha3", NULL), ENOENT);
+
+    //Test case: File does not exist
+    EXPECT_NULL_WITH_ERRNO(hdfsGetPathInfo(fs, invalid_path), ENOENT);
+
+    //Test case: No permission to access parent directory
+    EXPECT_ZERO(hdfsChmod(fs, paths->prefix, 0));
+    //reconnect as user "SomeGuy" and verify that we get permission errors
+    hdfsFS fs2 = NULL;
+    EXPECT_ZERO(hdfsSingleNameNodeConnect(tlhCluster, &fs2, "SomeGuy"));
+    EXPECT_NULL_WITH_ERRNO(hdfsGetPathInfo(fs2, paths->file2), EACCES);
+    EXPECT_ZERO(hdfsDisconnect(fs2));
     return 0;
     return 0;
 }
 }
 
 
@@ -285,6 +329,8 @@ static int testHdfsOperationsImpl(struct tlhThreadInfo *ti)
     fprintf(stderr, "testHdfsOperations(threadIdx=%d): starting\n",
     fprintf(stderr, "testHdfsOperations(threadIdx=%d): starting\n",
         ti->threadIdx);
         ti->threadIdx);
     EXPECT_ZERO(hdfsSingleNameNodeConnect(tlhCluster, &fs, NULL));
     EXPECT_ZERO(hdfsSingleNameNodeConnect(tlhCluster, &fs, NULL));
+    if (!fs)
+        return 1;
     EXPECT_ZERO(setupPaths(ti, &paths));
     EXPECT_ZERO(setupPaths(ti, &paths));
     // test some operations
     // test some operations
     EXPECT_ZERO(doTestHdfsOperations(ti, fs, &paths));
     EXPECT_ZERO(doTestHdfsOperations(ti, fs, &paths));
@@ -295,6 +341,8 @@ static int testHdfsOperationsImpl(struct tlhThreadInfo *ti)
     EXPECT_ZERO(hdfsDisconnect(fs));
     EXPECT_ZERO(hdfsDisconnect(fs));
     // reconnect to do the final delete.
     // reconnect to do the final delete.
     EXPECT_ZERO(hdfsSingleNameNodeConnect(tlhCluster, &fs, NULL));
     EXPECT_ZERO(hdfsSingleNameNodeConnect(tlhCluster, &fs, NULL));
+    if (!fs)
+        return 1;
     EXPECT_ZERO(hdfsDelete(fs, paths.prefix, 1));
     EXPECT_ZERO(hdfsDelete(fs, paths.prefix, 1));
     EXPECT_ZERO(hdfsDisconnect(fs));
     EXPECT_ZERO(hdfsDisconnect(fs));
     return 0;
     return 0;
@@ -325,7 +373,7 @@ static int checkFailures(struct tlhThreadInfo *ti, int tlhNumThreads)
     for (i = 0; i < tlhNumThreads; i++) {
     for (i = 0; i < tlhNumThreads; i++) {
         if (ti[i].success != 0) {
         if (ti[i].success != 0) {
             fprintf(stderr, "%s%d", sep, i);
             fprintf(stderr, "%s%d", sep, i);
-            sep = ", "; 
+            sep = ", ";
         }
         }
     }
     }
     fprintf(stderr, "].  FAILURE.\n");
     fprintf(stderr, "].  FAILURE.\n");

+ 49 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CMake/FindCyrusSASL.cmake

@@ -0,0 +1,49 @@
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# - Find Cyrus SASL (sasl.h, libsasl2.so)
+#
+# This module defines
+#  CYRUS_SASL_INCLUDE_DIR, directory containing headers
+#  CYRUS_SASL_SHARED_LIB, path to Cyrus SASL's shared library
+#  CYRUS_SASL_FOUND, whether Cyrus SASL and its plugins have been found
+#
+# N.B: we do _not_ include sasl in thirdparty, for a fairly subtle reason. The
+# TLDR version is that newer versions of cyrus-sasl (>=2.1.26) have a bug fix
+# for https://bugzilla.cyrusimap.org/show_bug.cgi?id=3590, but that bug fix
+# relied on a change both on the plugin side and on the library side. If you
+# then try to run the new version of sasl (e.g from our thirdparty tree) with
+# an older version of a plugin (eg from RHEL6 install), you'll get a SASL_NOMECH
+# error due to this bug.
+#
+# In practice, Cyrus-SASL is so commonly used and generally non-ABI-breaking that
+# we should be OK to depend on the host installation.
+
+# Note that this is modified from the version that was copied from our
+# friends at the Kudu project.  The original version implicitly required
+# the Cyrus SASL.  This version will only complain if REQUIRED is added.
+
+
+find_path(CYRUS_SASL_INCLUDE_DIR sasl/sasl.h)
+find_library(CYRUS_SASL_SHARED_LIB sasl2)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(CYRUS_SASL DEFAULT_MSG
+  CYRUS_SASL_SHARED_LIB CYRUS_SASL_INCLUDE_DIR)
+
+MARK_AS_ADVANCED(CYRUS_SASL_INCLUDE_DIR CYRUS_SASL_SHARED_LIB)

+ 44 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CMake/FindGSasl.cmake

@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# - Try to find the GNU sasl library (gsasl)
+#
+# Once done this will define
+#
+#  GSASL_FOUND - System has gnutls
+#  GSASL_INCLUDE_DIR - The gnutls include directory
+#  GSASL_LIBRARIES - The libraries needed to use gnutls
+#  GSASL_DEFINITIONS - Compiler switches required for using gnutls
+
+
+IF (GSASL_INCLUDE_DIR AND GSASL_LIBRARIES)
+  # in cache already
+  SET(GSasl_FIND_QUIETLY TRUE)
+ENDIF (GSASL_INCLUDE_DIR AND GSASL_LIBRARIES)
+
+FIND_PATH(GSASL_INCLUDE_DIR gsasl.h)
+
+FIND_LIBRARY(GSASL_LIBRARIES gsasl)
+
+INCLUDE(FindPackageHandleStandardArgs)
+
+# handle the QUIETLY and REQUIRED arguments and set GSASL_FOUND to TRUE if
+# all listed variables are TRUE
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(GSASL DEFAULT_MSG GSASL_LIBRARIES GSASL_INCLUDE_DIR)
+
+MARK_AS_ADVANCED(GSASL_INCLUDE_DIR GSASL_LIBRARIES)

+ 297 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CMakeLists.txt

@@ -0,0 +1,297 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# If cmake variable HDFSPP_LIBRARY_ONLY is set, then tests, examples, and
+# tools will not be built. This allows for faster builds of the libhdfspp
+# library alone, avoids looking for a JDK, valgrind, and gmock, and
+# prevents the generation of multiple binaries that might not be relevant
+# to other projects during normal use.
+# Example of cmake invocation with HDFSPP_LIBRARY_ONLY enabled:
+# cmake -DHDFSPP_LIBRARY_ONLY=1
+
+project (libhdfspp)
+
+cmake_minimum_required(VERSION 2.8)
+
+enable_testing()
+include (CTest)
+
+SET(BUILD_SHARED_HDFSPP TRUE CACHE STRING "BUILD_SHARED_HDFSPP defaulting to 'TRUE'")
+SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMake" ${CMAKE_MODULE_PATH})
+
+# If there's a better way to inform FindCyrusSASL.cmake, let's make this cleaner:
+SET(CMAKE_PREFIX_PATH "${CMAKE_PREFIX_PATH};${CYRUS_SASL_DIR};${GSASL_DIR};$ENV{PROTOBUF_HOME}")
+
+# Specify PROTOBUF_HOME so that find_package picks up the correct version
+SET(CMAKE_PREFIX_PATH "${CMAKE_PREFIX_PATH};$ENV{PROTOBUF_HOME}")
+
+find_package(Doxygen)
+find_package(OpenSSL REQUIRED)
+find_package(Protobuf REQUIRED)
+find_package(CyrusSASL)
+find_package(GSasl)
+find_package(Threads)
+
+include(CheckCXXSourceCompiles)
+
+# Check if thread_local is supported
+unset (THREAD_LOCAL_SUPPORTED CACHE)
+set (CMAKE_REQUIRED_DEFINITIONS "-std=c++11")
+set (CMAKE_REQUIRED_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
+check_cxx_source_compiles(
+    "#include <thread>
+    int main(void) {
+      thread_local int s;
+      return 0;
+    }"
+    THREAD_LOCAL_SUPPORTED)
+if (NOT THREAD_LOCAL_SUPPORTED)
+  message(FATAL_ERROR
+  "FATAL ERROR: The required feature thread_local storage is not supported by your compiler. \
+  Known compilers that support this feature: GCC, Visual Studio, Clang (community version), \
+  Clang (version for iOS 9 and later).")
+endif (NOT THREAD_LOCAL_SUPPORTED)
+
+# Check if PROTOC library was compiled with the compatible compiler by trying
+# to compile some dummy code
+unset (PROTOC_IS_COMPATIBLE CACHE)
+set (CMAKE_REQUIRED_INCLUDES ${PROTOBUF_INCLUDE_DIRS})
+set (CMAKE_REQUIRED_LIBRARIES ${PROTOBUF_LIBRARY} ${PROTOBUF_PROTOC_LIBRARY})
+check_cxx_source_compiles(
+    "#include <google/protobuf/io/printer.h>
+    #include <string>
+    int main(void) {
+      ::google::protobuf::io::ZeroCopyOutputStream *out = NULL;
+      ::google::protobuf::io::Printer printer(out, '$');
+      printer.PrintRaw(std::string(\"test\"));
+      return 0;
+    }"
+    PROTOC_IS_COMPATIBLE)
+if (NOT PROTOC_IS_COMPATIBLE)
+  message(WARNING
+  "WARNING: the Protocol Buffers Library and the Libhdfs++ Library must both be compiled \
+  with the same (or compatible) compiler. Normally only the same major versions of the same \
+  compiler are compatible with each other.")
+endif (NOT PROTOC_IS_COMPATIBLE)
+
+find_program(MEMORYCHECK_COMMAND valgrind HINTS ${VALGRIND_DIR} )
+set(MEMORYCHECK_COMMAND_OPTIONS "--trace-children=yes --leak-check=full --error-exitcode=1")
+message(STATUS "valgrind location: ${MEMORYCHECK_COMMAND}")
+
+if (REQUIRE_VALGRIND AND MEMORYCHECK_COMMAND MATCHES "MEMORYCHECK_COMMAND-NOTFOUND" )
+  message(FATAL_ERROR "valgrind was required but not found.  "
+                      "The path can be included via a -DVALGRIND_DIR=... flag passed to CMake.")
+endif (REQUIRE_VALGRIND AND MEMORYCHECK_COMMAND MATCHES "MEMORYCHECK_COMMAND-NOTFOUND" )
+
+# Find the SASL library to use.  If you don't want to require a sasl library,
+#    define -DNO_SASL=1 in your cmake call
+# Prefer Cyrus SASL, but use GSASL if it is found
+# Note that the packages can be disabled by setting CMAKE_DISABLE_FIND_PACKAGE_GSasl or
+#    CMAKE_DISABLE_FIND_PACKAGE_CyrusSASL, respectively (case sensitive)
+set (SASL_LIBRARIES)
+set (SASL_INCLUDE_DIR)
+if (NOT NO_SASL)
+    if (CYRUS_SASL_FOUND)
+        message(STATUS "Using Cyrus SASL; link with ${CYRUS_SASL_SHARED_LIB}")
+        set (SASL_INCLUDE_DIR ${CYRUS_SASL_INCLUDE_DIR})
+        set (SASL_LIBRARIES ${CYRUS_SASL_SHARED_LIB})
+        set (CMAKE_USING_CYRUS_SASL 1)
+        add_definitions(-DUSE_SASL -DUSE_CYRUS_SASL)
+    else (CYRUS_SASL_FOUND)
+        if (REQUIRE_CYRUS_SASL)
+          message(FATAL_ERROR "Cyrus SASL was required but not found.  "
+                                "The path can be included via a -DCYRUS_SASL_DIR=... flag passed to CMake.")
+        endif (REQUIRE_CYRUS_SASL)
+
+        # If we didn't pick Cyrus, use GSASL instead
+        if (GSASL_FOUND)
+          message(STATUS "Using GSASL; link with ${GSASL_LIBRARIES}")
+          set (SASL_INCLUDE_DIR ${GSASL_INCLUDE_DIR})
+          set (SASL_LIBRARIES ${GSASL_LIBRARIES})
+          set (CMAKE_USING_GSASL 1)
+          add_definitions(-DUSE_SASL -DUSE_GSASL)
+        else (GSASL_FOUND)
+          if (REQUIRE_GSASL)
+            message(FATAL_ERROR "GSASL was required but not found.  "
+                                "The path can be included via a -DGSASL_DIR=... flag passed to CMake.")
+          endif (REQUIRE_GSASL)
+
+          # No SASL was found, but NO_SASL was not defined
+          message(FATAL_ERROR "Cound not find a SASL library (GSASL (gsasl) or Cyrus SASL (libsasl2).  "
+                            "Install/configure one of them or define NO_SASL=1 in your cmake call")
+        endif (GSASL_FOUND)
+    endif (CYRUS_SASL_FOUND)
+else (NOT NO_SASL)
+    message(STATUS "Compiling with NO SASL SUPPORT")
+endif (NOT NO_SASL)
+
+add_definitions(-DASIO_STANDALONE -DASIO_CPP11_DATE_TIME)
+
+# Disable optimizations if compiling debug
+set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0")
+
+if(UNIX)
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -std=c++11 -g -fPIC -fno-strict-aliasing")
+set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -fPIC -fno-strict-aliasing")
+endif()
+
+if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+    add_definitions(-DASIO_HAS_STD_ADDRESSOF -DASIO_HAS_STD_ARRAY -DASIO_HAS_STD_ATOMIC -DASIO_HAS_CSTDINT -DASIO_HAS_STD_SHARED_PTR -DASIO_HAS_STD_TYPE_TRAITS -DASIO_HAS_VARIADIC_TEMPLATES -DASIO_HAS_STD_FUNCTION -DASIO_HAS_STD_CHRONO -DASIO_HAS_STD_SYSTEM_ERROR)
+endif ()
+
+# Mac OS 10.7 and later deprecates most of the methods in OpenSSL.
+# Add -Wno-deprecated-declarations to avoid the warnings.
+if(APPLE)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++ -Wno-deprecated-declarations -Wno-unused-local-typedef")
+endif()
+
+if(DOXYGEN_FOUND)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/doc/Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/doc/Doxyfile @ONLY)
+add_custom_target(doc ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/doc/Doxyfile
+                  WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+                  COMMENT "Generating API documentation with Doxygen" VERBATIM)
+endif(DOXYGEN_FOUND)
+
+
+# Copy files from the hadoop tree into the output/extern directory if
+#    they've changed
+function (copy_on_demand input_src_glob input_dest_dir)
+  get_filename_component(src_glob ${input_src_glob} REALPATH)
+  get_filename_component(dest_dir ${input_dest_dir} REALPATH)
+  get_filename_component(src_dir ${src_glob} PATH)
+  message(STATUS "Syncing ${src_glob} to ${dest_dir}")
+
+  file(GLOB_RECURSE src_files ${src_glob})
+  foreach(src_path ${src_files})
+    file(RELATIVE_PATH relative_src ${src_dir} ${src_path})
+    set(dest_path "${dest_dir}/${relative_src}")
+    add_custom_command(TARGET copy_hadoop_files
+     COMMAND ${CMAKE_COMMAND} -E copy_if_different "${src_path}" "${dest_path}"
+    )
+  endforeach()
+endfunction()
+
+# If we're building in the hadoop tree, pull the Hadoop files that
+#     libhdfspp depends on.  This allows us to ensure that
+#     the distribution will have a consistent set of headers and
+#     .proto files
+if(HADOOP_BUILD)
+    set(HADOOP_IMPORT_DIR ${PROJECT_BINARY_DIR}/extern)
+    get_filename_component(HADOOP_IMPORT_DIR ${HADOOP_IMPORT_DIR} REALPATH)
+
+  add_custom_target(copy_hadoop_files ALL)
+
+  # Gather the Hadoop files and resources that libhdfs++ needs to build
+  copy_on_demand(../libhdfs/include/*.h* ${HADOOP_IMPORT_DIR}/include)
+  copy_on_demand(${CMAKE_CURRENT_LIST_DIR}/../../../../../hadoop-hdfs-client/src/main/proto/*.proto ${HADOOP_IMPORT_DIR}/proto/hdfs)
+  copy_on_demand(${CMAKE_CURRENT_LIST_DIR}/../../../../../../hadoop-common-project/hadoop-common/src/main/proto/*.proto  ${HADOOP_IMPORT_DIR}/proto/hadoop)
+  copy_on_demand(${CMAKE_CURRENT_LIST_DIR}/../../../../../../hadoop-common-project/hadoop-common/src/test/proto/*.proto  ${HADOOP_IMPORT_DIR}/proto/hadoop_test)
+else(HADOOP_BUILD)
+  set(HADOOP_IMPORT_DIR ${CMAKE_CURRENT_LIST_DIR}/extern)
+endif(HADOOP_BUILD)
+
+# Paths to find the imported files
+set(PROTO_HDFS_DIR         ${HADOOP_IMPORT_DIR}/proto/hdfs)
+set(PROTO_HADOOP_DIR       ${HADOOP_IMPORT_DIR}/proto/hadoop)
+set(PROTO_HADOOP_TEST_DIR  ${HADOOP_IMPORT_DIR}/proto/hadoop_test)
+
+include_directories(
+  include
+  lib
+  ${HADOOP_IMPORT_DIR}/include
+)
+
+include_directories( SYSTEM
+  ${PROJECT_BINARY_DIR}/lib/proto
+  third_party/asio-1.10.2/include
+  third_party/rapidxml-1.13
+  third_party/gmock-1.7.0
+  third_party/tr2
+  third_party/protobuf
+  third_party/uriparser2
+  ${OPENSSL_INCLUDE_DIR}
+  ${SASL_INCLUDE_DIR}
+  ${PROTOBUF_INCLUDE_DIRS}
+)
+
+
+add_subdirectory(third_party/gmock-1.7.0)
+add_subdirectory(third_party/uriparser2)
+add_subdirectory(lib)
+if(NOT HDFSPP_LIBRARY_ONLY)
+    add_subdirectory(tests)
+    add_subdirectory(examples)
+    add_subdirectory(tools)
+endif()
+
+# create an empty file; hadoop_add_dual_library wraps add_library which
+# requires at least one file as an argument
+set(EMPTY_FILE_CC ${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/empty.cc)
+file(WRITE ${EMPTY_FILE_CC} "")
+
+# Build the output libraries
+if(NEED_LINK_DL)
+   set(LIB_DL dl)
+endif()
+
+set(LIBHDFSPP_VERSION "0.1.0")
+set(LIBHDFSPP_ALL_OBJECTS $<TARGET_OBJECTS:bindings_c_obj> $<TARGET_OBJECTS:fs_obj> $<TARGET_OBJECTS:rpc_obj> $<TARGET_OBJECTS:reader_obj> $<TARGET_OBJECTS:proto_obj> $<TARGET_OBJECTS:connection_obj> $<TARGET_OBJECTS:common_obj> $<TARGET_OBJECTS:uriparser2_obj>)
+if (HADOOP_BUILD)
+  hadoop_add_dual_library(hdfspp ${EMPTY_FILE_CC} ${LIBHDFSPP_ALL_OBJECTS})
+  hadoop_target_link_dual_libraries(hdfspp
+    ${LIB_DL}
+    ${PROTOBUF_LIBRARY}
+    ${OPENSSL_LIBRARIES}
+    ${SASL_LIBRARIES}
+    ${CMAKE_THREAD_LIBS_INIT}
+  )
+  set_target_properties(hdfspp PROPERTIES SOVERSION ${LIBHDFSPP_VERSION})
+else (HADOOP_BUILD)
+  add_library(hdfspp_static STATIC ${EMPTY_FILE_CC} ${LIBHDFSPP_ALL_OBJECTS})
+  target_link_libraries(hdfspp_static
+    ${LIB_DL}
+    ${PROTOBUF_LIBRARY}
+    ${OPENSSL_LIBRARIES}
+    ${SASL_LIBRARIES}
+    ${CMAKE_THREAD_LIBS_INIT}
+    )
+  if(BUILD_SHARED_HDFSPP)
+    add_library(hdfspp SHARED ${EMPTY_FILE_CC} ${LIBHDFSPP_ALL_OBJECTS})
+    set_target_properties(hdfspp PROPERTIES SOVERSION ${LIBHDFSPP_VERSION})
+  endif(BUILD_SHARED_HDFSPP)
+endif (HADOOP_BUILD)
+
+# Set up make install targets
+# Can be installed to a particular location via "make DESTDIR=... install"
+file(GLOB_RECURSE LIBHDFSPP_HEADER_FILES "${CMAKE_CURRENT_LIST_DIR}/include/*.h*")
+file(GLOB_RECURSE LIBHDFS_HEADER_FILES "${HADOOP_IMPORT_DIR}/include/*.h*")
+install(FILES ${LIBHDFSPP_HEADER_FILES} DESTINATION include/hdfspp)
+install(FILES ${LIBHDFS_HEADER_FILES} DESTINATION include/hdfs)
+
+install(TARGETS hdfspp_static ARCHIVE DESTINATION lib)
+if(BUILD_SHARED_HDFSPP)
+  install(TARGETS hdfspp LIBRARY DESTINATION lib)
+endif(BUILD_SHARED_HDFSPP)
+
+add_custom_target(
+    InstallToBuildDirectory
+    COMMAND "${CMAKE_MAKE_PROGRAM}" install DESTDIR=${PROJECT_BINARY_DIR}/output
+)
+set(LIBHDFSPP_DIR ${PROJECT_BINARY_DIR}/output)

+ 161 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CONTRIBUTING.md

@@ -0,0 +1,161 @@
+<!---
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+Libhdfs++ Coding Standards
+==========================
+
+* Libhdfs++ Coding Standards
+    * Introduction
+    * Automated Formatting
+    * Explicit Scoping
+    * Comments
+    * Portability
+
+
+Introduction
+------------
+
+The foundation of the libhdfs++ project's coding standards
+is Google's C++ style guide. It can be found here:
+
+<a href="https://google.github.io/styleguide/cppguide.html">https://google.github.io/styleguide/cppguide.html</a>
+
+There are several small restrictions adopted from Sun's Java
+standards and Hadoop convention on top of Google's that must
+also be followed as well as portability requirements.
+
+Automated Formatting
+--------------------
+
+Prior to submitting a patch for code review use llvm's formatting tool, clang-format, on the .h, .c, and .cc files included in the patch.  Use the -style=google switch when doing so.
+
+Example presubmission usage:
+
+``` shell
+cat my_source_file.cc | clang-format -style=goole > temp_file.cc
+#optionally diff the source and temp file to get an idea what changed
+mv temp_file.cc my_source_file.cc
+```
+
+* note: On some linux distributions clang-format already exists in repositories but don't show up without an appended version number.  On Ubuntu you'll find it with:
+``` shell
+   "apt-get install clang-format-3.6"
+```
+
+Explicit Block Scopes
+---------------------
+
+Always add brackets conditional and loop bodies, even if the body could fit on a single line.
+
+__BAD__:
+``` c
+if (foo)
+  Bar();
+
+if (foo)
+  Bar();
+else
+  Baz();
+
+for (int i=0; i<10; i++)
+  Bar(i);
+```
+__GOOD__:
+``` c
+if (foo) {
+  Bar();
+}
+
+if (foo) {
+  Bar();
+} else {
+  Baz();
+}
+
+for (int i=0; i<10; i++) {
+  Bar(i);
+}
+```
+
+Comments
+--------
+
+Use the /\* comment \*/ style to maintain consistency with the rest of the Hadoop code base.
+
+__BAD__:
+``` c
+//this is a bad single line comment
+/*
+  this is a bad block comment
+*/
+```
+__GOOD__:
+``` c
+/* this is a single line comment */
+
+/**
+ * This is a block comment.  Note that nothing is on the first
+ * line of the block.
+ **/
+```
+
+Portability
+-----------
+
+Please make sure you write code that is portable.
+
+* All code most be able to build using GCC and LLVM.
+    * In the future we hope to support other compilers as well.
+* Don't make assumptions about endianness or architecture.
+    * Don't do clever things with pointers or intrinsics.
+* Don't write code that could force a non-aligned word access.
+    * This causes performance issues on most architectures and isn't supported at all on some.
+    * Generally the compiler will prevent this unless you are doing clever things with pointers e.g. abusing placement new or reinterpreting a pointer into a pointer to a wider type.
+* If a type needs to be a a specific width make sure to specify it.
+    * `int32_t my_32_bit_wide_int`
+* Avoid using compiler dependent pragmas or attributes.
+    * If there is a justified and unavoidable reason for using these you must document why. See examples below.
+
+__BAD__:
+``` c
+struct Foo {
+  int32_t x_;
+  char y_;
+  int32_t z_;
+  char z_;
+} __attribute__((packed));
+/**
+ * "I didn't profile and identify that this is causing
+ * significant memory overhead but I want to pack it to
+ * save 6 bytes"
+ **/
+```
+__NECESSARY__: Still not good but required for short-circuit reads.
+``` c
+struct FileDescriptorMessage {
+  struct cmsghdr msg_;
+  int file_descriptors_[2];
+} __attribute__((packed));
+/**
+ * This is actually needed for short circuit reads.
+ * "struct cmsghdr" is well defined on UNIX systems.
+ * This mechanism relies on the fact that any passed
+ * ancillary data is _directly_ following the cmghdr.
+ * The kernel interprets any padding as real data.
+ **/
+```

+ 35 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/doc/Doxyfile.in

@@ -0,0 +1,35 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+DOXYFILE_ENCODING      = UTF-8
+PROJECT_NAME           = "libhdfspp"
+OUTPUT_DIRECTORY       = doc
+TAB_SIZE               = 2
+MARKDOWN_SUPPORT       = YES
+BUILTIN_STL_SUPPORT    = YES
+
+
+INPUT                  = @PROJECT_SOURCE_DIR@/doc/mainpage.dox \
+                         @PROJECT_SOURCE_DIR@/include/libhdfspp \
+                         @PROJECT_SOURCE_DIR@/lib/common/continuation \
+
+INPUT_ENCODING         = UTF-8
+RECURSIVE              = NO
+
+GENERATE_HTML          = YES
+GENERATE_LATEX         = NO

+ 5 - 13
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/InvalidAllocationTagException.java → hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/doc/mainpage.dox

@@ -16,19 +16,11 @@
  * limitations under the License.
  * limitations under the License.
  */
  */
 
 
-package org.apache.hadoop.yarn.exceptions;
-
 /**
 /**
- * This exception is thrown by
- * {@link
- * org.apache.hadoop.yarn.api.records.AllocationTagNamespace#parse(String)}
- * when it fails to parse a namespace.
- */
-public class InvalidAllocationTagException extends YarnException {
+\mainpage libhdfs++
 
 
-  private static final long serialVersionUID = 1L;
+libhdfs++ is a modern implementation of HDFS client in C++11. It is
+optimized for the Massive Parallel Processing (MPP) applications that
+access thousands of files concurrently in HDFS.
 
 
-  public InvalidAllocationTagException(String message) {
-    super(message);
-  }
-}
+*/

+ 20 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/CMakeLists.txt

@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_subdirectory(c)
+add_subdirectory(cc)

+ 20 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/CMakeLists.txt

@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_subdirectory(cat)
+add_subdirectory(connect_cancel)

+ 27 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/cat/CMakeLists.txt

@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default LIBHDFSPP_DIR to the default install location.  You can override
+#    it by add -DLIBHDFSPP_DIR=... to your cmake invocation
+set(LIBHDFSPP_DIR CACHE STRING ${CMAKE_INSTALL_PREFIX})
+
+include_directories( ${LIBHDFSPP_DIR}/include )
+link_directories( ${LIBHDFSPP_DIR}/lib )
+
+add_executable(cat_c cat.c)
+target_link_libraries(cat_c hdfspp_static uriparser2)

+ 121 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/cat/cat.c

@@ -0,0 +1,121 @@
+/*
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+*/
+
+/*
+  A a stripped down version of unix's "cat".
+  Doesn't deal with any flags for now, will just attempt to read the whole file.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "hdfspp/hdfs_ext.h"
+#include "uriparser2/uriparser2.h"
+#include "common/util_c.h"
+
+#define SCHEME "hdfs"
+#define BUF_SIZE 1048576 //1 MB
+static char input_buffer[BUF_SIZE];
+
+int main(int argc, char** argv) {
+
+  char error_text[1024];
+  if (argc != 2) {
+    fprintf(stderr, "usage: cat [hdfs://[<hostname>:<port>]]/<path-to-file>\n");
+    return 1;
+  }
+
+  URI * uri = NULL;
+  const char * uri_path = argv[1];
+
+  //Separate check for scheme is required, otherwise uriparser2.h library causes memory issues under valgrind
+  const char * scheme_end = strstr(uri_path, "://");
+  if (scheme_end) {
+    if (strncmp(uri_path, SCHEME, strlen(SCHEME)) != 0) {
+      fprintf(stderr, "Scheme %.*s:// is not supported.\n", (int) (scheme_end - uri_path), uri_path);
+      return 1;
+    } else {
+      uri = uri_parse(uri_path);
+    }
+  }
+  if (!uri) {
+    fprintf(stderr, "Malformed URI: %s\n", uri_path);
+    return 1;
+  }
+
+  struct hdfsBuilder* builder = hdfsNewBuilder();
+  if (uri->host)
+    hdfsBuilderSetNameNode(builder, uri->host);
+  if (uri->port != 0)
+    hdfsBuilderSetNameNodePort(builder, uri->port);
+
+  hdfsFS fs = hdfsBuilderConnect(builder);
+  if (fs == NULL) {
+    hdfsGetLastError(error_text, sizeof(error_text));
+    const char * host = uri->host ? uri->host : "<default>";
+    int port = uri->port;
+    if (port == 0)
+      port = 8020;
+    fprintf(stderr, "Unable to connect to %s:%d, hdfsConnect returned null.\n%s\n",
+            host, port, error_text);
+    return 1;
+  }
+
+  hdfsFile file = hdfsOpenFile(fs, uri->path, 0, 0, 0, 0);
+  if (NULL == file) {
+    hdfsGetLastError(error_text, sizeof(error_text));
+    fprintf(stderr, "Unable to open file %s: %s\n", uri->path, error_text );
+    hdfsDisconnect(fs);
+    hdfsFreeBuilder(builder);
+    return 1;
+  }
+
+  ssize_t read_bytes_count = 0;
+  ssize_t last_read_bytes = 0;
+
+  while (0 < (last_read_bytes =
+                  hdfsPread(fs, file, read_bytes_count, input_buffer, sizeof(input_buffer)))) {
+    fwrite(input_buffer, last_read_bytes, 1, stdout);
+    read_bytes_count += last_read_bytes;
+  }
+
+  int res = 0;
+  res = hdfsCloseFile(fs, file);
+  if (0 != res) {
+    hdfsGetLastError(error_text, sizeof(error_text));
+    fprintf(stderr, "Error closing file: %s\n", error_text);
+    hdfsDisconnect(fs);
+    hdfsFreeBuilder(builder);
+    return 1;
+  }
+
+  res = hdfsDisconnect(fs);
+  if (0 != res) {
+    hdfsGetLastError(error_text, sizeof(error_text));
+    fprintf(stderr, "Error disconnecting filesystem: %s", error_text);
+    hdfsFreeBuilder(builder);
+    return 1;
+  }
+
+  hdfsFreeBuilder(builder);
+  free(uri);
+  // Clean up static data and prevent valgrind memory leaks
+  ShutdownProtobufLibrary_C();
+  return 0;
+}

+ 27 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/connect_cancel/CMakeLists.txt

@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default LIBHDFSPP_DIR to the default install location.  You can override
+#    it by add -DLIBHDFSPP_DIR=... to your cmake invocation
+set(LIBHDFSPP_DIR CACHE STRING ${CMAKE_INSTALL_PREFIX})
+
+include_directories( ${LIBHDFSPP_DIR}/include )
+link_directories( ${LIBHDFSPP_DIR}/lib )
+
+add_executable(connect_cancel_c connect_cancel.c)
+target_link_libraries(connect_cancel_c hdfspp_static uriparser2)

+ 107 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/connect_cancel/connect_cancel.c

@@ -0,0 +1,107 @@
+/*
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+*/
+
+/*
+  Attempt to connect to a cluster and use Control-C to bail out if it takes a while.
+  Valid config must be in environment variable $HADOOP_CONF_DIR
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include "hdfspp/hdfs_ext.h"
+#include "common/util_c.h"
+
+#define ERROR_BUFFER_SIZE 1024
+
+// Global so signal handler can get at it
+hdfsFS fs = NULL;
+
+const char *catch_enter  = "In signal handler, going to try and cancel.\n";
+const char *catch_cancel = "hdfsCancelPendingConnect has been canceled in the signal handler.\n";
+const char *catch_exit   = "Exiting the signal handler.\n";
+
+// Print to stdout without calling malloc or otherwise indirectly modify userspace state.
+// Write calls to stdout may still interleave with stuff coming from elsewhere.
+static void sighandler_direct_stdout(const char *msg) {
+  if(!msg)
+    return;
+  ssize_t res = write(1 /*posix stdout fd*/, msg, strlen(msg));
+  (void)res;
+}
+
+static void sig_catch(int val) {
+  // Beware of calling things that aren't reentrant e.g. malloc while in a signal handler.
+  sighandler_direct_stdout(catch_enter);
+
+  if(fs) {
+    hdfsCancelPendingConnection(fs);
+    sighandler_direct_stdout(catch_cancel);
+  }
+  sighandler_direct_stdout(catch_exit);
+}
+
+
+int main(int argc, char** argv) {
+  hdfsSetLoggingLevel(HDFSPP_LOG_LEVEL_INFO);
+  signal(SIGINT, sig_catch);
+
+  char error_text[ERROR_BUFFER_SIZE];
+  if (argc != 1) {
+    fprintf(stderr, "usage: ./connect_cancel_c\n");
+    ShutdownProtobufLibrary_C();
+    exit(EXIT_FAILURE);
+  }
+
+  const char *hdfsconfdir = getenv("HADOOP_CONF_DIR");
+  if(!hdfsconfdir) {
+    fprintf(stderr, "$HADOOP_CONF_DIR must be set\n");
+    ShutdownProtobufLibrary_C();
+    exit(EXIT_FAILURE);
+  }
+
+  struct hdfsBuilder* builder = hdfsNewBuilderFromDirectory(hdfsconfdir);
+
+  fs = hdfsAllocateFileSystem(builder);
+  if (fs == NULL) {
+    hdfsGetLastError(error_text, ERROR_BUFFER_SIZE);
+    fprintf(stderr, "hdfsAllocateFileSystem returned null.\n%s\n", error_text);
+    hdfsFreeBuilder(builder);
+    ShutdownProtobufLibrary_C();
+    exit(EXIT_FAILURE);
+  }
+
+  int connected = hdfsConnectAllocated(fs, builder);
+  if (connected != 0) {
+    hdfsGetLastError(error_text, ERROR_BUFFER_SIZE);
+    fprintf(stderr, "hdfsConnectAllocated errored.\n%s\n", error_text);
+    hdfsFreeBuilder(builder);
+    ShutdownProtobufLibrary_C();
+    exit(EXIT_FAILURE);
+  }
+
+  hdfsDisconnect(fs);
+  hdfsFreeBuilder(builder);
+  // Clean up static data and prevent valgrind memory leaks
+  ShutdownProtobufLibrary_C();
+  return 0;
+}

+ 24 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/CMakeLists.txt

@@ -0,0 +1,24 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+include_directories( ../../tools )
+
+add_subdirectory(cat)
+add_subdirectory(gendirs)
+add_subdirectory(find)
+add_subdirectory(connect_cancel)

+ 27 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/cat/CMakeLists.txt

@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default LIBHDFSPP_DIR to the default install location.  You can override
+#    it by add -DLIBHDFSPP_DIR=... to your cmake invocation
+set(LIBHDFSPP_DIR CACHE STRING ${CMAKE_INSTALL_PREFIX})
+
+include_directories( ${LIBHDFSPP_DIR}/include )
+link_directories( ${LIBHDFSPP_DIR}/lib )
+
+add_executable(cat cat.cc)
+target_link_libraries(cat tools_common hdfspp_static)

+ 89 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/cat/cat.cc

@@ -0,0 +1,89 @@
+/*
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+*/
+
+/**
+   * Unix-like cat tool example.
+   *
+   * Reads the specified file from HDFS and outputs to stdout.
+   *
+   * Usage: cat /<path-to-file>
+   *
+   * Example: cat /dir/file
+   *
+   * @param path-to-file    Absolute path to the file to read.
+   *
+   **/
+
+#include "hdfspp/hdfspp.h"
+#include <google/protobuf/stubs/common.h>
+#include "tools_common.h"
+
+const std::size_t BUF_SIZE = 1048576; //1 MB
+static char input_buffer[BUF_SIZE];
+
+int main(int argc, char *argv[]) {
+  if (argc != 2) {
+    std::cerr << "usage: cat /<path-to-file>" << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  std::string path = argv[1];
+
+  //Building a URI object from the given uri path
+  hdfs::URI uri = hdfs::parse_path_or_exit(path);
+
+  std::shared_ptr<hdfs::FileSystem> fs = hdfs::doConnect(uri, false);
+  if (!fs) {
+    std::cerr << "Could not connect the file system. " << std::endl;
+    exit(EXIT_FAILURE);
+  }
+
+  hdfs::FileHandle *file_raw = nullptr;
+  hdfs::Status status = fs->Open(path, &file_raw);
+  if (!status.ok()) {
+    std::cerr << "Could not open file " << path << ". " << status.ToString() << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  //wrapping file_raw into a unique pointer to guarantee deletion
+  std::unique_ptr<hdfs::FileHandle> file(file_raw);
+
+  ssize_t total_bytes_read = 0;
+  size_t last_bytes_read = 0;
+
+  do{
+    //Reading file chunks
+    status = file->Read(input_buffer, sizeof(input_buffer), &last_bytes_read);
+    if(status.ok()) {
+      //Writing file chunks to stdout
+      fwrite(input_buffer, last_bytes_read, 1, stdout);
+      total_bytes_read += last_bytes_read;
+    } else {
+      if(status.is_invalid_offset()){
+        //Reached the end of the file
+        break;
+      } else {
+        std::cerr << "Error reading the file: " << status.ToString() << std::endl;
+        exit(EXIT_FAILURE);
+      }
+    }
+  } while (last_bytes_read > 0);
+
+  // Clean up static data and prevent valgrind memory leaks
+  google::protobuf::ShutdownProtobufLibrary();
+  return 0;
+}

+ 27 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/connect_cancel/CMakeLists.txt

@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default LIBHDFSPP_DIR to the default install location.  You can override
+#    it by add -DLIBHDFSPP_DIR=... to your cmake invocation
+set(LIBHDFSPP_DIR CACHE STRING ${CMAKE_INSTALL_PREFIX})
+
+include_directories( ${LIBHDFSPP_DIR}/include )
+link_directories( ${LIBHDFSPP_DIR}/lib )
+
+add_executable(connect_cancel connect_cancel.cc)
+target_link_libraries(connect_cancel hdfspp_static)

+ 154 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/connect_cancel/connect_cancel.cc

@@ -0,0 +1,154 @@
+/*
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+*/
+
+
+#include "hdfspp/hdfspp.h"
+#include "common/hdfs_configuration.h"
+#include "common/configuration_loader.h"
+
+#include <google/protobuf/stubs/common.h>
+
+#include <signal.h>
+#include <unistd.h>
+
+#include <thread>
+#include <iostream>
+
+// Simple example of how to cancel an async connect call.
+// Here Control-C (SIGINT) is caught in order to invoke the FS level cancel and
+// properly tear down the process.  Valgrind should show no leaked memory on exit
+// when cancel has been called.  URI parsing code is omitted and defaultFs from
+// /etc/hadoop/conf or $HADOOP_CONF_DIR is always used.
+
+// Scoped globally to make it simple to reference from the signal handler.
+std::shared_ptr<hdfs::FileSystem> fs;
+
+const std::string catch_enter("In signal handler, going to try and cancel FileSystem::Connect.\n");
+const std::string catch_cancel("FileSystem::Cancel has been canceled in the signal handler.\n");
+const std::string catch_exit("Exiting the signal handler.\n");
+
+// Avoid IO reentrancy issues, see comments in signal handler below.
+// It's possible that the write interleaves with another write call,
+// but it won't corrupt the stack or heap.
+static void sighandler_direct_stdout(const std::string &msg) {
+  ssize_t res = ::write(1 /*posix stdout FD*/, msg.data(), msg.size());
+  // In production you'd want to check res, but error handling code will
+  // need to be fairly application specific if it's going to properly
+  // avoid reentrant calls to malloc.
+  (void)res;
+}
+
+// Signal handler to make a SIGINT call cancel rather than exit().
+static void sig_catch(int val) {
+  (void)val;
+  // This is avoiding the tricky bits of signal handling, notably that the
+  // underlying string manipulation and IO functions used by the the logger
+  // are unlikely to be reentrant.
+  //
+  // Production code could mask out all logging on handler entry and enable
+  // it again on exit; here we just assume it's "good enough" and some
+  // (possibly broken) log messages are better than none.
+
+  sighandler_direct_stdout(catch_enter);
+  if(fs) {
+    // This will invoke the callback immediately with an OperationCanceled status
+    fs->CancelPendingConnect();
+    sighandler_direct_stdout(catch_cancel);
+  }
+  sighandler_direct_stdout(catch_exit);
+}
+
+
+int main(int arg_token_count, const char **args) {
+  (void)args;
+  if(arg_token_count != 1) {
+    std::cerr << "usage: ./connect_cancel";
+    google::protobuf::ShutdownProtobufLibrary();
+    exit(EXIT_FAILURE);
+  }
+
+  // Register signal handle to asynchronously invoke cancel from outside the main thread.
+  signal(SIGINT, sig_catch);
+
+  // Generic setup/config code much like the other examples.
+  hdfs::Options options;
+  //Setting the config path to the default: "$HADOOP_CONF_DIR" or "/etc/hadoop/conf"
+  hdfs::ConfigurationLoader loader;
+  //Loading default config files core-site.xml and hdfs-site.xml from the config path
+  hdfs::optional<hdfs::HdfsConfiguration> config = loader.LoadDefaultResources<hdfs::HdfsConfiguration>();
+  //TODO: HDFS-9539 - after this is resolved, valid config will always be returned.
+  if(config){
+    //Loading options from the config
+    options = config->GetOptions();
+  }
+
+
+  // Start an IoService and some worker threads
+  std::shared_ptr<hdfs::IoService> service = hdfs::IoService::MakeShared();
+  if(nullptr == service) {
+    std::cerr << "Unable to create IoService" << std::endl;
+    fs.reset();
+    // Nasty hack to clean up for valgrind since we don't have the C++17 optional<T>::reset method
+    config = decltype(config)();
+    google::protobuf::ShutdownProtobufLibrary();
+    exit(EXIT_FAILURE);
+  }
+
+  unsigned int worker_count = service->InitDefaultWorkers();
+  if(worker_count < 1) {
+    std::cerr << "Unable to create IoService worker threads";
+    fs.reset();
+    service->Stop();
+    config = decltype(config)();
+    google::protobuf::ShutdownProtobufLibrary();
+    exit(EXIT_FAILURE);
+  }
+
+  // Set up and connect to the FileSystem
+  fs.reset(hdfs::FileSystem::New(service, "", options));
+  if(nullptr == fs) {
+    std::cerr << "Unable to create FileSystem" << std::endl;
+    fs.reset();
+    service->Stop();
+    config = decltype(config)();
+    google::protobuf::ShutdownProtobufLibrary();
+    exit(EXIT_FAILURE);
+  }
+
+  hdfs::Status status = fs->ConnectToDefaultFs();
+  if (!status.ok()) {
+    if(!options.defaultFS.get_host().empty()){
+      std::cerr << "Error connecting to " << options.defaultFS << ". " << status.ToString() << std::endl;
+    } else {
+      std::cerr << "Error connecting to the cluster: defaultFS is empty. " << status.ToString() << std::endl;
+    }
+    fs.reset();
+    service->Stop();
+    config = decltype(config)();
+    google::protobuf::ShutdownProtobufLibrary();
+    exit(EXIT_FAILURE);
+  }
+
+  fs.reset();
+  service->Stop();
+  config = decltype(config)();
+  google::protobuf::ShutdownProtobufLibrary();
+
+  return 0;
+}

+ 27 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/find/CMakeLists.txt

@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default LIBHDFSPP_DIR to the default install location.  You can override
+#    it by add -DLIBHDFSPP_DIR=... to your cmake invocation
+set(LIBHDFSPP_DIR CACHE STRING ${CMAKE_INSTALL_PREFIX})
+
+include_directories( ${LIBHDFSPP_DIR}/include )
+link_directories( ${LIBHDFSPP_DIR}/lib )
+
+add_executable(find find.cc)
+target_link_libraries(find tools_common hdfspp_static)

+ 140 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/find/find.cc

@@ -0,0 +1,140 @@
+/*
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+*/
+
+/**
+   * A parallel find tool example.
+   *
+   * Finds all files matching the specified name recursively starting from the
+   * specified directory and prints their filepaths. Works either synchronously
+   * or asynchronously.
+   *
+   * Usage: find /<path-to-file> <file-name> <use_async>
+   *
+   * Example: find /dir?/tree* some?file*name 1
+   *
+   * @param path-to-file    Absolute path at which to begin search, can have wild
+   *                        cards and must be non-blank
+   * @param file-name       Name to find, can have wild cards and must be non-blank
+   * @param use_async       If set to 1 it prints out results asynchronously as
+   *                        they arrive. If set to 0 results are printed in one
+   *                        big chunk when it becomes available.
+   *
+   **/
+
+#include "hdfspp/hdfspp.h"
+#include <google/protobuf/stubs/common.h>
+#include <future>
+#include "tools_common.h"
+
+void SyncFind(std::shared_ptr<hdfs::FileSystem> fs, const std::string &path, const std::string &name){
+  std::vector<hdfs::StatInfo> results;
+  //Synchronous call to Find
+  hdfs::Status stat = fs->Find(path, name, hdfs::FileSystem::GetDefaultFindMaxDepth(), &results);
+
+  if (!stat.ok()) {
+    std::cerr << "Error: " << stat.ToString() << std::endl;
+  }
+
+  if(results.empty()){
+    std::cout << "Nothing Found" << std::endl;
+  } else {
+    //Printing out the results
+    for (hdfs::StatInfo const& si : results) {
+      std::cout << si.full_path << std::endl;
+    }
+  }
+}
+
+void AsyncFind(std::shared_ptr<hdfs::FileSystem> fs, const std::string &path, const std::string &name){
+  std::promise<void> promise;
+  std::future<void> future(promise.get_future());
+  bool something_found = false;
+  hdfs::Status status = hdfs::Status::OK();
+
+  /**
+    * Keep requesting more until we get the entire listing. Set the promise
+    * when we have the entire listing to stop.
+    *
+    * Find guarantees that the handler will only be called once at a time,
+    * so we do not need any locking here
+    */
+  auto handler = [&promise, &status, &something_found]
+                  (const hdfs::Status &s, const std::vector<hdfs::StatInfo> & si, bool has_more_results) -> bool {
+    //Print result chunks as they arrive
+    if(!si.empty()) {
+      something_found = true;
+      for (hdfs::StatInfo const& s : si) {
+        std::cout << s.full_path << std::endl;
+      }
+    }
+    if(!s.ok() && status.ok()){
+      //We make sure we set 'status' only on the first error.
+      status = s;
+    }
+    if (!has_more_results) {
+      promise.set_value();  //set promise
+      return false;         //request stop sending results
+    }
+    return true;  //request more results
+  };
+
+  //Asynchronous call to Find
+  fs->Find(path, name, hdfs::FileSystem::GetDefaultFindMaxDepth(), handler);
+
+  //block until promise is set
+  future.get();
+  if(!status.ok()) {
+    std::cerr << "Error: " << status.ToString() << std::endl;
+  }
+  if(!something_found){
+    std::cout << "Nothing Found" << std::endl;
+  }
+}
+
+int main(int argc, char *argv[]) {
+  if (argc != 4) {
+    std::cerr << "usage: find /<path-to-file> <file-name> <use_async>" << std::endl;
+    exit(EXIT_FAILURE);
+  }
+
+  std::string path = argv[1];
+  std::string name = argv[2];
+  bool use_async = (std::stoi(argv[3]) != 0);
+
+  //Building a URI object from the given uri path
+  hdfs::URI uri = hdfs::parse_path_or_exit(path);
+
+  std::shared_ptr<hdfs::FileSystem> fs = hdfs::doConnect(uri, true);
+  if (!fs) {
+    std::cerr << "Could not connect the file system. " << std::endl;
+    exit(EXIT_FAILURE);
+  }
+
+  if (use_async){
+    //Example of Async find
+    AsyncFind(fs, path, name);
+  } else {
+    //Example of Sync find
+    SyncFind(fs, path, name);
+  }
+
+  // Clean up static data and prevent valgrind memory leaks
+  google::protobuf::ShutdownProtobufLibrary();
+  return 0;
+}

+ 27 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/gendirs/CMakeLists.txt

@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default LIBHDFSPP_DIR to the default install location.  You can override
+#    it by add -DLIBHDFSPP_DIR=... to your cmake invocation
+set(LIBHDFSPP_DIR CACHE STRING ${CMAKE_INSTALL_PREFIX})
+
+include_directories( ${LIBHDFSPP_DIR}/include )
+link_directories( ${LIBHDFSPP_DIR}/lib )
+
+add_executable(gendirs gendirs.cc)
+target_link_libraries(gendirs tools_common hdfspp_static)

+ 122 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/gendirs/gendirs.cc

@@ -0,0 +1,122 @@
+/*
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+*/
+
+/**
+   * A recursive directory generator tool.
+   *
+   * Generates a directory tree with specified depth and fanout starting from
+   * a given path. Generation is asynchronous.
+   *
+   * Usage:   gendirs /<path-to-dir> <depth> <fanout>
+   *
+   * Example: gendirs /dir0 3 10
+   *
+   * @param path-to-dir   Absolute path to the directory tree root where the
+   *                      directory tree will be generated
+   * @param depth         Depth of the directory tree (number of levels from
+   *                      root to leaves)
+   * @param fanout        Fanout of each directory (number of sub-directories to
+   *                      be created inside each directory except leaf directories)
+   *
+   **/
+
+#include "hdfspp/hdfspp.h"
+#include <google/protobuf/stubs/common.h>
+#include <future>
+#include "tools_common.h"
+
+#define DEFAULT_PERMISSIONS 0755
+
+void GenerateDirectories (std::shared_ptr<hdfs::FileSystem> fs, int depth, int level, int fanout, std::string path, std::vector<std::future<hdfs::Status>> & futures) {
+  //Level contains our current depth in the directory tree
+  if(level < depth) {
+    for(int i = 0; i < fanout; i++){
+      //Recursive calls to cover all possible paths from the root to the leave nodes
+      GenerateDirectories(fs, depth, level+1, fanout, path + "dir" + std::to_string(i) + "/", futures);
+    }
+  } else {
+    //We have reached the leaf nodes and now start making calls to create directories
+    //We make a promise which will be set when the call finishes and executes our handler
+    auto callstate = std::make_shared<std::promise<hdfs::Status>>();
+    //Extract a future from this promise
+    std::future<hdfs::Status> future(callstate->get_future());
+    //Save this future to the vector of futures which will be used to wait on all promises
+    //after the whole recursion is done
+    futures.push_back(std::move(future));
+    //Create a handler that will be executed when Mkdirs is done
+    auto handler = [callstate](const hdfs::Status &s) {
+      callstate->set_value(s);
+    };
+    //Asynchronous call to create this directory along with all missing parent directories
+    fs->Mkdirs(path, DEFAULT_PERMISSIONS, true, handler);
+  }
+}
+
+int main(int argc, char *argv[]) {
+  if (argc != 4) {
+    std::cerr << "usage: gendirs /<path-to-dir> <depth> <fanout>" << std::endl;
+    exit(EXIT_FAILURE);
+  }
+
+  std::string path = argv[1];
+  int depth = std::stoi(argv[2]);
+  int fanout = std::stoi(argv[3]);
+
+  //Building a URI object from the given uri path
+  hdfs::URI uri = hdfs::parse_path_or_exit(path);
+
+  std::shared_ptr<hdfs::FileSystem> fs = hdfs::doConnect(uri, true);
+  if (!fs) {
+    std::cerr << "Could not connect the file system. " << std::endl;
+    exit(EXIT_FAILURE);
+  }
+
+  /**
+   * We do not want the recursion to block on anything, therefore we will be
+   * making asynchronous calls recursively, and then just waiting for all
+   * the calls to finish.
+   *
+   * This array of futures will be populated by the recursive function below.
+   * Each new asynchronous Mkdirs call will add a future to this vector, and will
+   * create a promise, which will only be set when the call was completed and
+   * processed. After the whole recursion is complete we will need to wait until
+   * all promises are set before we can exit.
+   **/
+  std::vector<std::future<hdfs::Status>> futures;
+
+  GenerateDirectories(fs, depth, 0, fanout, path + "/", futures);
+
+  /**
+   * We are waiting here until all promises are set, and checking whether
+   * the returned statuses contained any errors.
+   **/
+  for(std::future<hdfs::Status> &fs : futures){
+    hdfs::Status status = fs.get();
+    if (!status.ok()) {
+      std::cerr << "Error: " << status.ToString() << std::endl;
+      exit(EXIT_FAILURE);
+    }
+  }
+
+  std::cout << "All done!" << std::endl;
+
+  // Clean up static data and prevent valgrind memory leaks
+  google::protobuf::ShutdownProtobufLibrary();
+  return 0;
+}

+ 177 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/block_location.h

@@ -0,0 +1,177 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HDFSPP_BLOCK_LOCATION_H
+#define HDFSPP_BLOCK_LOCATION_H
+
+namespace hdfs {
+
+class DNInfo {
+public:
+  DNInfo() : xfer_port_(-1), info_port_(-1), IPC_port_(-1), info_secure_port_(-1) {}
+
+  std::string getHostname() const {
+    return hostname_;
+  }
+
+  void setHostname(const std::string & hostname) {
+    this->hostname_ = hostname;
+  }
+
+  std::string getIPAddr() const {
+    return ip_addr_;
+  }
+
+  void setIPAddr(const std::string & ip_addr) {
+    this->ip_addr_ = ip_addr;
+  }
+
+  std::string getNetworkLocation() const {
+    return network_location_;
+  }
+
+  void setNetworkLocation(const std::string & location) {
+    this->network_location_ = location;
+  }
+
+  int getXferPort() const {
+    return xfer_port_;
+  }
+
+  void setXferPort(int xfer_port) {
+    this->xfer_port_ = xfer_port;
+  }
+
+  int getInfoPort() const {
+    return info_port_;
+  }
+
+  void setInfoPort(int info_port) {
+    this->info_port_ = info_port;
+  }
+
+  int getIPCPort() const {
+    return IPC_port_;
+  }
+
+  void setIPCPort(int IPC_port) {
+    this->IPC_port_ = IPC_port;
+  }
+
+  int getInfoSecurePort() const {
+    return info_secure_port_;
+  }
+
+  void setInfoSecurePort(int info_secure_port) {
+    this->info_secure_port_ = info_secure_port;
+  }
+private:
+  std::string hostname_;
+  std::string ip_addr_;
+  std::string network_location_;
+  int         xfer_port_;
+  int         info_port_;
+  int         IPC_port_;
+  int         info_secure_port_;
+};
+
+class BlockLocation {
+public:
+    bool isCorrupt() const {
+        return corrupt_;
+    }
+
+    void setCorrupt(bool corrupt) {
+        this->corrupt_ = corrupt;
+    }
+
+    int64_t getLength() const {
+        return length_;
+    }
+
+    void setLength(int64_t length) {
+        this->length_ = length;
+    }
+
+    int64_t getOffset() const {
+        return offset_;
+    }
+
+    void setOffset(int64_t offset) {
+        this->offset_ = offset;
+    }
+
+    const std::vector<DNInfo> & getDataNodes() const {
+        return dn_info_;
+    }
+
+    void setDataNodes(const std::vector<DNInfo> & dn_info) {
+        this->dn_info_ = dn_info;
+    }
+
+private:
+    bool corrupt_;
+    int64_t length_;
+    int64_t offset_;  // Offset of the block in the file
+    std::vector<DNInfo> dn_info_; // Info about who stores each block
+};
+
+class FileBlockLocation {
+public:
+  uint64_t getFileLength() {
+    return fileLength_;
+  }
+
+  void setFileLength(uint64_t fileLength) {
+    this->fileLength_ = fileLength;
+  }
+
+  bool isLastBlockComplete() const {
+    return this->lastBlockComplete_;
+  }
+
+  void setLastBlockComplete(bool lastBlockComplete) {
+    this->lastBlockComplete_ = lastBlockComplete;
+  }
+
+  bool isUnderConstruction() const {
+    return underConstruction_;
+  }
+
+  void setUnderConstruction(bool underConstruction) {
+    this->underConstruction_ = underConstruction;
+  }
+
+  const std::vector<BlockLocation> & getBlockLocations() const {
+    return blockLocations_;
+  }
+
+  void setBlockLocations(const std::vector<BlockLocation> & blockLocations) {
+    this->blockLocations_ = blockLocations;
+  }
+private:
+  uint64_t fileLength_;
+  bool     lastBlockComplete_;
+  bool     underConstruction_;
+  std::vector<BlockLocation> blockLocations_;
+};
+
+} // namespace hdfs
+
+
+#endif /* HDFSPP_BLOCK_LOCATION_H */

+ 68 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/config_parser.h

@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIBHDFSPP_CONFIGPARSER_H_
+#define LIBHDFSPP_CONFIGPARSER_H_
+
+#include "hdfspp/options.h"
+#include "hdfspp/uri.h"
+#include "hdfspp/status.h"
+
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace hdfs {
+
+class ConfigParser {
+ public:
+  ConfigParser();
+  ConfigParser(const std::string& path);
+  ConfigParser(const std::vector<std::string>& configDirectories);
+  ~ConfigParser();
+  ConfigParser(ConfigParser&&);
+  ConfigParser& operator=(ConfigParser&&);
+
+  bool LoadDefaultResources();
+  std::vector<std::pair<std::string, Status> > ValidateResources() const;
+
+  // Return false if value couldn't be found or cast to desired type
+  bool get_int(const std::string& key, int& outval) const;
+  int get_int_or(const std::string& key, const int defaultval) const;
+
+  bool get_string(const std::string& key, std::string& outval) const;
+  std::string get_string_or(const std::string& key, const std::string& defaultval) const;
+
+  bool get_bool(const std::string& key, bool& outval) const;
+  bool get_bool_or(const std::string& key, const bool defaultval) const;
+
+  bool get_double(const std::string& key, double& outval) const;
+  double get_double_or(const std::string& key, const double defaultval) const;
+
+  bool get_uri(const std::string& key, URI& outval) const;
+  URI get_uri_or(const std::string& key, const URI& defaultval) const;
+
+  bool get_options(Options& outval) const;
+  Options get_options_or(const Options& defaultval) const;
+
+ private:
+  class impl;
+  std::unique_ptr<impl> pImpl;
+};
+
+}
+#endif

+ 48 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/content_summary.h

@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef HDFSPP_CONTENT_SUMMARY_H_
+#define HDFSPP_CONTENT_SUMMARY_H_
+
+#include <string>
+
+namespace hdfs {
+
+/**
+ * Content summary is assumed to be unchanging for the duration of the operation
+ */
+struct ContentSummary {
+  uint64_t length;
+  uint64_t filecount;
+  uint64_t directorycount;
+  uint64_t quota;
+  uint64_t spaceconsumed;
+  uint64_t spacequota;
+  std::string path;
+
+  ContentSummary();
+
+  //Converts ContentSummary object to std::string (hdfs_count format)
+  std::string str(bool include_quota) const;
+
+  //Converts ContentSummary object to std::string (hdfs_du format)
+  std::string str_du() const;
+};
+
+}
+
+#endif

+ 141 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/events.h

@@ -0,0 +1,141 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HDFSPP_EVENTS
+#define HDFSPP_EVENTS
+
+#include "hdfspp/status.h"
+
+#include <functional>
+
+namespace hdfs {
+
+/*
+ * Supported event names.  These names will stay consistent in libhdfs callbacks.
+ *
+ * Other events not listed here may be seen, but they are not stable and
+ * should not be counted on.  May need to be broken up into more components
+ * as more events are added.
+ */
+
+static constexpr const char * FS_NN_CONNECT_EVENT = "NN::connect";
+static constexpr const char * FS_NN_READ_EVENT = "NN::read";
+static constexpr const char * FS_NN_WRITE_EVENT = "NN::write";
+
+static constexpr const char * FILE_DN_CONNECT_EVENT = "DN::connect";
+static constexpr const char * FILE_DN_READ_EVENT = "DN::read";
+static constexpr const char * FILE_DN_WRITE_EVENT = "DN::write";
+
+
+// NN failover event due to issues with the current NN; might be standby, might be dead.
+// Invokes the fs_event_callback using the nameservice name in the cluster string.
+// The uint64_t value argument holds an address that can be reinterpreted as a const char *
+// and provides the full URI of the node the failover will attempt to connect to next.
+static constexpr const char * FS_NN_FAILOVER_EVENT = "NN::failover";
+
+// Invoked when RpcConnection tries to use an empty set of endpoints to figure out
+// which NN in a HA cluster to connect to.
+static constexpr const char * FS_NN_EMPTY_ENDPOINTS_EVENT = "NN::bad_failover::no_endpoints";
+
+// Invoked prior to determining if failed NN rpc calls should be retried or discarded.
+static constexpr const char * FS_NN_PRE_RPC_RETRY_EVENT = "NN::rpc::get_retry_action";
+
+class event_response {
+public:
+  // Helper factories
+  // The default ok response; libhdfspp should continue normally
+  static event_response make_ok() {
+    return event_response(kOk);
+  }
+  static event_response make_caught_std_exception(const char *what) {
+    return event_response(kCaughtStdException, what);
+  }
+  static event_response make_caught_unknown_exception() {
+    return event_response(kCaughtUnknownException);
+  }
+
+  // High level classification of responses
+  enum event_response_type {
+    kOk = 0,
+    // User supplied callback threw.
+    // Std exceptions will copy the what() string
+    kCaughtStdException = 1,
+    kCaughtUnknownException = 2,
+
+    // Responses to be used in testing only
+    kTest_Error = 100
+  };
+
+  event_response_type response_type() { return response_type_; }
+
+private:
+  // Use factories to construct for now
+  event_response();
+  event_response(event_response_type type)
+            : response_type_(type)
+  {
+    if(type == kCaughtUnknownException) {
+      status_ = Status::Exception("c++ unknown exception", "");
+    }
+  }
+  event_response(event_response_type type, const char *what)
+            : response_type_(type),
+              exception_msg_(what==nullptr ? "" : what)
+  {
+    status_ = Status::Exception("c++ std::exception", exception_msg_.c_str());
+  }
+
+
+  event_response_type response_type_;
+
+  // use to hold what str if event handler threw
+  std::string exception_msg_;
+
+
+///////////////////////////////////////////////
+//
+//   Testing support
+//
+// The consumer can stimulate errors
+// within libhdfdspp by returning a Status from the callback.
+///////////////////////////////////////////////
+public:
+  static event_response test_err(const Status &status) {
+    return event_response(status);
+  }
+
+  Status status() { return status_; }
+
+private:
+  event_response(const Status & status) :
+    response_type_(event_response_type::kTest_Error), status_(status) {}
+
+  Status status_; // To be used with kTest_Error
+};
+
+/* callback signature */
+typedef std::function<event_response (const char * event,
+                                      const char * cluster,
+                                      int64_t value)> fs_event_callback;
+
+typedef std::function<event_response (const char * event,
+                                      const char * cluster,
+                                      const char * file,
+                                      int64_t value)>file_event_callback;
+}
+#endif

+ 48 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/fsinfo.h

@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef HDFSPP_FSINFO_H_
+#define HDFSPP_FSINFO_H_
+
+#include <string>
+
+namespace hdfs {
+
+/**
+ * Information that is assumed to be unchanging about a file system for the duration of
+ * the operations.
+ */
+struct FsInfo {
+
+  uint64_t capacity;
+  uint64_t used;
+  uint64_t remaining;
+  uint64_t under_replicated;
+  uint64_t corrupt_blocks;
+  uint64_t missing_blocks;
+  uint64_t missing_repl_one_blocks;
+  uint64_t blocks_in_future;
+
+  FsInfo();
+
+  //Converts FsInfo object to std::string (hdfs_df format)
+  std::string str(const std::string fs_name) const;
+};
+
+}
+
+#endif

+ 394 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/hdfs_ext.h

@@ -0,0 +1,394 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIBHDFSPP_HDFS_HDFSEXT
+#define LIBHDFSPP_HDFS_HDFSEXT
+
+#include <hdfspp/log.h>
+
+/* get typdefs and #defines from libhdfs' hdfs.h to stay consistent */
+#include <hdfs/hdfs.h>
+
+/**
+ *  Note: The #defines below are copied directly from libhdfs'
+ *  hdfs.h.  LIBHDFS_EXTERNAL gets explicitly #undefed at the
+ *  end of the file so it must be redefined here.
+ **/
+
+#ifdef WIN32
+    #ifdef LIBHDFS_DLL_EXPORT
+        #define LIBHDFS_EXTERNAL __declspec(dllexport)
+    #elif LIBHDFS_DLL_IMPORT
+        #define LIBHDFS_EXTERNAL __declspec(dllimport)
+    #else
+        #define LIBHDFS_EXTERNAL
+    #endif
+#else
+    #ifdef LIBHDFS_DLL_EXPORT
+        #define LIBHDFS_EXTERNAL __attribute__((visibility("default")))
+    #elif LIBHDFS_DLL_IMPORT
+        #define LIBHDFS_EXTERNAL __attribute__((visibility("default")))
+    #else
+        #define LIBHDFS_EXTERNAL
+    #endif
+#endif
+
+
+/**
+ * Keep C bindings that are libhdfs++ specific in here.
+ **/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ *  Reads the last error, if any, that happened in this thread
+ *  into the user supplied buffer.
+ *  @param buf  A chunk of memory with room for the error string.
+ *  @param len  Size of the buffer, if the message is longer than
+ *              len len-1 bytes of the message will be copied.
+ *  @return     0 on successful read of the last error, -1 otherwise.
+ **/
+LIBHDFS_EXTERNAL
+int hdfsGetLastError(char *buf, int len);
+
+
+/**
+ *  Cancels operations being made by the FileHandle.
+ *  Note: Cancel cannot be reversed.  This is intended
+ *  to be used before hdfsClose to avoid waiting for
+ *  operations to complete.
+ **/
+LIBHDFS_EXTERNAL
+int hdfsCancel(hdfsFS fs, hdfsFile file);
+
+/**
+ * Create an HDFS builder, using the configuration XML files from the indicated
+ * directory.  If the directory does not exist, or contains no configuration
+ * XML files, a Builder using all default values will be returned.
+ *
+ * @return The HDFS builder, or NULL on error.
+ */
+struct hdfsBuilder *hdfsNewBuilderFromDirectory(const char * configDirectory);
+
+
+/**
+ * Get a configuration string from the settings currently read into the builder.
+ *
+ * @param key      The key to find
+ * @param val      (out param) The value.  This will be set to NULL if the
+ *                 key isn't found.  You must free this string with
+ *                 hdfsConfStrFree.
+ *
+ * @return         0 on success; -1 otherwise.
+ *                 Failure to find the key is not an error.
+ */
+LIBHDFS_EXTERNAL
+int hdfsBuilderConfGetStr(struct hdfsBuilder *bld, const char *key,
+                          char **val);
+
+/**
+ * Get a configuration integer from the settings currently read into the builder.
+ *
+ * @param key      The key to find
+ * @param val      (out param) The value.  This will NOT be changed if the
+ *                 key isn't found.
+ *
+ * @return         0 on success; -1 otherwise.
+ *                 Failure to find the key is not an error.
+ */
+LIBHDFS_EXTERNAL
+int hdfsBuilderConfGetInt(struct hdfsBuilder *bld, const char *key, int32_t *val);
+
+
+/**
+ * Get a configuration long from the settings currently read into the builder.
+ *
+ * @param key      The key to find
+ * @param val      (out param) The value.  This will NOT be changed if the
+ *                 key isn't found.
+ *
+ * @return         0 on success; -1 otherwise.
+ *                 Failure to find the key is not an error.
+ */
+LIBHDFS_EXTERNAL
+int hdfsBuilderConfGetLong(struct hdfsBuilder *bld, const char *key, int64_t *val);
+
+struct hdfsDNInfo {
+  const char *    ip_address;
+  const char *    hostname;
+  const char *    network_location;
+  int             xfer_port;
+  int             info_port;
+  int             IPC_port;
+  int             info_secure_port;
+};
+
+struct hdfsBlockInfo {
+    uint64_t            start_offset;
+    uint64_t            num_bytes;
+
+    size_t              num_locations;
+    struct hdfsDNInfo * locations;
+};
+
+struct hdfsBlockLocations
+{
+    uint64_t               fileLength;
+    int                    isLastBlockComplete;
+    int                    isUnderConstruction;
+
+    size_t                 num_blocks;
+    struct hdfsBlockInfo * blocks;
+};
+
+/**
+ * Returns the block information and data nodes associated with a particular file.
+ *
+ * The hdfsBlockLocations structure will have zero or more hdfsBlockInfo elements,
+ * which will have zero or more ip_addr elements indicating which datanodes have
+ * each block.
+ *
+ * @param fs         A connected hdfs instance
+ * @param path       Path of the file to query
+ * @param locations  The address of an output pointer to contain the block information.
+ *                   On success, this pointer must be later freed with hdfsFreeBlockLocations.
+ *
+ * @return         0 on success; -1 otherwise.
+ *                 If the file does not exist, -1 will be returned and errno will be set.
+ */
+LIBHDFS_EXTERNAL
+int hdfsGetBlockLocations(hdfsFS fs, const char *path, struct hdfsBlockLocations ** locations);
+
+/**
+ * Frees up an hdfsBlockLocations pointer allocated by hdfsGetBlockLocations.
+ *
+ * @param locations    The previously-populated pointer allocated by hdfsGetBlockLocations
+ * @return             0 on success, -1 on error
+ */
+LIBHDFS_EXTERNAL
+int hdfsFreeBlockLocations(struct hdfsBlockLocations * locations);
+
+
+
+
+/**
+ *  Client can supply a C style function pointer to be invoked any time something
+ *  is logged.  Unlike the C++ logger this will not filter by level or component,
+ *  it is up to the consumer to throw away messages they don't want.
+ *
+ *  Note: The callback provided must be reentrant, the library does not guarentee
+ *  that there won't be concurrent calls.
+ *  Note: Callback does not own the LogData struct.  If the client would like to
+ *  keep one around use hdfsCopyLogData/hdfsFreeLogData.
+ **/
+LIBHDFS_EXTERNAL
+void hdfsSetLogFunction(void (*hook)(LogData*));
+
+/**
+ *  Create a copy of the LogData object passed in and return a pointer to it.
+ *  Returns null if it was unable to copy/
+ **/
+LIBHDFS_EXTERNAL
+LogData *hdfsCopyLogData(const LogData*);
+
+/**
+ *  Client must call this to dispose of the LogData created by hdfsCopyLogData.
+ **/
+LIBHDFS_EXTERNAL
+void hdfsFreeLogData(LogData*);
+
+/**
+ * Enable loggind functionality for a component.
+ * Return -1 on failure, 0 otherwise.
+ **/
+LIBHDFS_EXTERNAL
+int hdfsEnableLoggingForComponent(int component);
+
+/**
+ * Disable logging functionality for a component.
+ * Return -1 on failure, 0 otherwise.
+ **/
+LIBHDFS_EXTERNAL
+int hdfsDisableLoggingForComponent(int component);
+
+/**
+ * Set level between trace and error.
+ * Return -1 on failure, 0 otherwise.
+ **/
+LIBHDFS_EXTERNAL
+int hdfsSetLoggingLevel(int component);
+
+/*
+ * Supported event names.  These names will stay consistent in libhdfs callbacks.
+ *
+ * Other events not listed here may be seen, but they are not stable and
+ * should not be counted on.
+ */
+extern const char * FS_NN_CONNECT_EVENT;
+extern const char * FS_NN_READ_EVENT;
+extern const char * FS_NN_WRITE_EVENT;
+
+extern const char * FILE_DN_CONNECT_EVENT;
+extern const char * FILE_DN_READ_EVENT;
+extern const char * FILE_DN_WRITE_EVENT;
+
+
+#define LIBHDFSPP_EVENT_OK (0)
+#define DEBUG_SIMULATE_ERROR (-1)
+
+typedef int (*libhdfspp_fs_event_callback)(const char * event, const char * cluster,
+                                           int64_t value, int64_t cookie);
+typedef int (*libhdfspp_file_event_callback)(const char * event,
+                                             const char * cluster,
+                                             const char * file,
+                                             int64_t value, int64_t cookie);
+
+/**
+ * Registers a callback for the next filesystem connect operation the current
+ * thread executes.
+ *
+ *  @param handler A function pointer.  Taken as a void* and internally
+ *                 cast into the appropriate type.
+ *  @param cookie  An opaque value that will be passed into the handler; can
+ *                 be used to correlate the handler with some object in the
+ *                 consumer's space.
+ **/
+LIBHDFS_EXTERNAL
+int hdfsPreAttachFSMonitor(libhdfspp_fs_event_callback handler, int64_t cookie);
+
+
+/**
+ * Registers a callback for the next file open operation the current thread
+ * executes.
+ *
+ *  @param fs      The filesystem
+ *  @param handler A function pointer.  Taken as a void* and internally
+ *                 cast into the appropriate type.
+ *  @param cookie  An opaque value that will be passed into the handler; can
+ *                 be used to correlate the handler with some object in the
+ *                 consumer's space.
+ **/
+LIBHDFS_EXTERNAL
+int hdfsPreAttachFileMonitor(libhdfspp_file_event_callback handler, int64_t cookie);
+
+
+/**
+ * Finds file name on the file system. hdfsFreeFileInfo should be called to deallocate memory.
+ *
+ *  @param fs         The filesystem (required)
+ *  @param path       Path at which to begin search, can have wild cards  (must be non-blank)
+ *  @param name       Name to find, can have wild cards                   (must be non-blank)
+ *  @param numEntries Set to the number of files/directories in the result.
+ *  @return           Returns a dynamically-allocated array of hdfsFileInfo
+ *                    objects; NULL on error or empty result.
+ *                    errno is set to non-zero on error or zero on success.
+ **/
+LIBHDFS_EXTERNAL
+hdfsFileInfo * hdfsFind(hdfsFS fs, const char* path, const char* name, uint32_t * numEntries);
+
+
+/*****************************************************************************
+ *                    HDFS SNAPSHOT FUNCTIONS
+ ****************************************************************************/
+
+/**
+ * Creates a snapshot of a snapshottable directory specified by path
+ *
+ *  @param fs      The filesystem (required)
+ *  @param path    Path to the directory to be snapshotted (must be non-blank)
+ *  @param name    Name to be given to the created snapshot (may be NULL)
+ *  @return        0 on success, corresponding errno on failure
+ **/
+LIBHDFS_EXTERNAL
+int hdfsCreateSnapshot(hdfsFS fs, const char* path, const char* name);
+
+/**
+ * Deletes the directory snapshot specified by path and name
+ *
+ *  @param fs      The filesystem (required)
+ *  @param path    Path to the snapshotted directory (must be non-blank)
+ *  @param name    Name of the snapshot to be deleted (must be non-blank)
+ *  @return        0 on success, corresponding errno on failure
+ **/
+LIBHDFS_EXTERNAL
+int hdfsDeleteSnapshot(hdfsFS fs, const char* path, const char* name);
+
+/**
+ * Renames the directory snapshot specified by path from old_name to new_name
+ *
+ *  @param fs         The filesystem (required)
+ *  @param path       Path to the snapshotted directory (must be non-blank)
+ *  @param old_name   Current name of the snapshot (must be non-blank)
+ *  @param new_name   New name of the snapshot (must be non-blank)
+ *  @return           0 on success, corresponding errno on failure
+ **/
+int hdfsRenameSnapshot(hdfsFS fs, const char* path, const char* old_name, const char* new_name);
+
+/**
+ * Allows snapshots to be made on the specified directory
+ *
+ *  @param fs      The filesystem (required)
+ *  @param path    Path to the directory to be made snapshottable (must be non-blank)
+ *  @return        0 on success, corresponding errno on failure
+ **/
+LIBHDFS_EXTERNAL
+int hdfsAllowSnapshot(hdfsFS fs, const char* path);
+
+/**
+ * Disallows snapshots to be made on the specified directory
+ *
+ *  @param fs      The filesystem (required)
+ *  @param path    Path to the directory to be made non-snapshottable (must be non-blank)
+ *  @return        0 on success, corresponding errno on failure
+ **/
+LIBHDFS_EXTERNAL
+int hdfsDisallowSnapshot(hdfsFS fs, const char* path);
+
+/**
+ * Create a FileSystem based on the builder but don't connect
+ * @param bld     Used to populate config options in the same manner as hdfsBuilderConnect.
+ *                Does not free builder.
+ **/
+LIBHDFS_EXTERNAL
+hdfsFS hdfsAllocateFileSystem(struct hdfsBuilder *bld);
+
+/**
+ * Connect a FileSystem created with hdfsAllocateFileSystem
+ * @param fs      A disconnected FS created with hdfsAllocateFileSystem
+ * @param bld     The same or exact copy of the builder used for Allocate, we still need a few fields.
+ *                Does not free builder.
+ * @return        0 on success, corresponding errno on failure
+ **/
+LIBHDFS_EXTERNAL
+int hdfsConnectAllocated(hdfsFS fs, struct hdfsBuilder *bld);
+
+/**
+ * Cancel a pending connection on a FileSystem
+ * @param fs      A fs in the process of connecting using hdfsConnectAllocated in another thread.
+ * @return        0 on success, corresponding errno on failure
+ **/
+LIBHDFS_EXTERNAL
+int hdfsCancelPendingConnection(hdfsFS fs);
+
+
+#ifdef __cplusplus
+} /* end extern "C" */
+#endif
+
+#endif

+ 492 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/hdfspp.h

@@ -0,0 +1,492 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIBHDFSPP_HDFSPP_H_
+#define LIBHDFSPP_HDFSPP_H_
+
+#include "hdfspp/options.h"
+#include "hdfspp/status.h"
+#include "hdfspp/events.h"
+#include "hdfspp/block_location.h"
+#include "hdfspp/statinfo.h"
+#include "hdfspp/fsinfo.h"
+#include "hdfspp/content_summary.h"
+#include "hdfspp/uri.h"
+#include "hdfspp/config_parser.h"
+#include "hdfspp/locks.h"
+
+#include <functional>
+#include <memory>
+#include <set>
+#include <iostream>
+
+namespace hdfs {
+
+/**
+ * An IoService manages a queue of asynchronous tasks. All libhdfs++
+ * operations are filed against a particular IoService.
+ *
+ * When an operation is queued into an IoService, the IoService will
+ * run the callback handler associated with the operation. Note that
+ * the IoService must be stopped before destructing the objects that
+ * post the operations.
+ *
+ * From an implementation point of view the hdfs::IoService provides
+ * a thin wrapper over an asio::io_service object so that additional
+ * instrumentation and functionality can be added.
+ **/
+
+class IoService : public std::enable_shared_from_this<IoService>
+{
+ public:
+  static IoService *New();
+  static std::shared_ptr<IoService> MakeShared();
+  virtual ~IoService();
+
+  /**
+   * Start up as many threads as there are logical processors.
+   * Return number of threads created.
+   **/
+  virtual unsigned int InitDefaultWorkers() = 0;
+
+  /**
+   * Initialize with thread_count handler threads.
+   * If thread count is less than one print a log message and default to one thread.
+   * Return number of threads created.
+   **/
+  virtual unsigned int InitWorkers(unsigned int thread_count) = 0;
+
+  /**
+   * Place an item on the execution queue.  Will be invoked from outside of the calling context.
+   **/
+  virtual void PostTask(std::function<void(void)>& asyncTask) = 0;
+
+  /**
+   * Run the asynchronous tasks associated with this IoService.
+   **/
+  virtual void Run() = 0;
+  /**
+   * Stop running asynchronous tasks associated with this IoService.
+   * All worker threads will return as soon as they finish executing their current task.
+   **/
+  virtual void Stop() = 0;
+};
+
+/**
+ * A node exclusion rule provides a simple way of testing if the
+ * client should attempt to connect to a node based on the node's
+ * UUID.  The FileSystem and FileHandle use the BadDataNodeTracker
+ * by default.  AsyncPreadSome takes an optional NodeExclusionRule
+ * that will override the BadDataNodeTracker.
+ **/
+class NodeExclusionRule {
+ public:
+  virtual ~NodeExclusionRule();
+  virtual bool IsBadNode(const std::string &node_uuid) = 0;
+};
+
+/**
+ * Applications opens a FileHandle to read files in HDFS.
+ **/
+class FileHandle {
+public:
+  /**
+   * Read data from a specific position. The current implementation
+   * stops at the block boundary.
+   *
+   * @param buf the pointer to the buffer
+   * @param buf_size the size of the buffer
+   * @param offset the offset the file
+   *
+   * The handler returns the datanode that serves the block and the number of
+   * bytes has read. Status::InvalidOffset is returned when trying to begin
+   * a read past the EOF.
+   **/
+  virtual void
+  PositionRead(void *buf, size_t buf_size, uint64_t offset,
+               const std::function<void(const Status &, size_t)> &handler) = 0;
+  virtual Status PositionRead(void *buf, size_t buf_size, off_t offset, size_t *bytes_read) = 0;
+  virtual Status Read(void *buf, size_t buf_size, size_t *bytes_read) = 0;
+  virtual Status Seek(off_t *offset, std::ios_base::seekdir whence) = 0;
+
+  /**
+   * Cancel outstanding file operations.  This is not reversable, once called
+   * the handle should be disposed of.
+   **/
+  virtual void CancelOperations(void) = 0;
+
+  /**
+   * Determine if a datanode should be excluded from future operations
+   * based on the return Status.
+   *
+   * @param status the Status object returned by InputStream::PositionRead
+   * @return true if the status indicates a failure that is not recoverable
+   * by the client and false otherwise.
+   **/
+  static bool ShouldExclude(const Status &status);
+
+
+  /**
+   * Sets an event callback for file-level event notifications (such as connecting
+   * to the DataNode, communications errors, etc.)
+   *
+   * Many events are defined in hdfspp/events.h; the consumer should also expect
+   * to be called with many private events, which can be ignored.
+   *
+   * @param callback The function to call when a reporting event occurs.
+   */
+  virtual void SetFileEventCallback(file_event_callback callback) = 0;
+
+  /* how many bytes have been successfully read */
+  virtual uint64_t get_bytes_read() = 0;
+
+  /* resets the number of bytes read to zero */
+  virtual void clear_bytes_read() = 0;
+
+  virtual ~FileHandle();
+};
+
+/**
+ * FileSystem implements APIs to interact with HDFS.
+ **/
+class FileSystem {
+ public:
+  //Returns the default maximum depth for recursive Find tool
+  static uint32_t GetDefaultFindMaxDepth();
+
+  //Returns the default permission mask
+  static uint16_t GetDefaultPermissionMask();
+
+  //Checks if the given permission mask is valid
+  static Status CheckValidPermissionMask(uint16_t permissions);
+
+  //Checks if replication value is valid
+  static Status CheckValidReplication(uint16_t replication);
+
+  /**
+   * Create a new instance of the FileSystem object. The call
+   * initializes the RPC connections to the NameNode and returns an
+   * FileSystem object.
+   *
+   * Note: The FileSystem takes ownership of the IoService passed in the
+   * constructor.  The FileSystem destructor will call delete on it.
+   *
+   * If user_name is blank, the current user will be used for a default.
+   **/
+  static FileSystem *New(
+      IoService *&io_service, const std::string &user_name, const Options &options);
+
+  /**
+   * Works the same as the other FileSystem::New but takes a copy of an existing IoService.
+   * The shared IoService is expected to already have worker threads initialized.
+   **/
+  static FileSystem *New(
+      std::shared_ptr<IoService>, const std::string &user_name, const Options &options);
+
+  /**
+   * Returns a new instance with default user and option, with the default IOService.
+   **/
+  static FileSystem *New();
+
+  /**
+   *  Callback type for async FileSystem::Connect calls.
+   *    Provides the result status and instance pointer to the handler.
+   **/
+  typedef std::function<void(const Status& result_status, FileSystem *created_fs)> AsyncConnectCallback;
+
+  /**
+   *  Connect directly to the specified namenode using the host and port (service).
+   **/
+  virtual void Connect(const std::string &server, const std::string &service,
+      const AsyncConnectCallback &handler) = 0;
+
+  /* Synchronous call of Connect */
+  virtual Status Connect(const std::string &server, const std::string &service) = 0;
+
+
+  /**
+   * Connects to the hdfs instance indicated by the defaultFs value of the
+   * Options structure.
+   *
+   * If no defaultFs is defined, returns an error.
+   */
+  virtual void ConnectToDefaultFs(
+      const AsyncConnectCallback& handler) = 0;
+  virtual Status ConnectToDefaultFs() = 0;
+
+  /**
+   * Cancels any attempts to connect to the HDFS cluster.
+   * FileSystem is expected to be destroyed after invoking this.
+   */
+  virtual bool CancelPendingConnect() = 0;
+
+  /**
+   * Open a file on HDFS. The call issues an RPC to the NameNode to
+   * gather the locations of all blocks in the file and to return a
+   * new instance of the @ref InputStream object.
+   **/
+  virtual void
+  Open(const std::string &path,
+       const std::function<void(const Status &, FileHandle *)> &handler) = 0;
+  virtual Status Open(const std::string &path, FileHandle **handle) = 0;
+
+  /**
+   * Get the block size for the given file.
+   * @param path The path to the file
+   */
+  virtual void GetPreferredBlockSize(const std::string &path,
+      const std::function<void(const Status &, const uint64_t &)> &handler) = 0;
+  virtual Status GetPreferredBlockSize(const std::string &path, uint64_t & block_size) = 0;
+
+  /**
+   * Set replication for an existing file.
+   * <p>
+   * The NameNode sets replication to the new value and returns.
+   * The actual block replication is not expected to be performed during
+   * this method call. The blocks will be populated or removed in the
+   * background as the result of the routine block maintenance procedures.
+   *
+   * @param path file name
+   * @param replication new replication
+   */
+  virtual void SetReplication(const std::string & path, int16_t replication, std::function<void(const Status &)> handler) = 0;
+  virtual Status SetReplication(const std::string & path, int16_t replication) = 0;
+
+  /**
+   * Sets the modification and access time of the file to the specified time.
+   * @param path The string representation of the path
+   * @param mtime The number of milliseconds since Jan 1, 1970.
+   *              Setting mtime to -1 means that modification time should not
+   *              be set by this call.
+   * @param atime The number of milliseconds since Jan 1, 1970.
+   *              Setting atime to -1 means that access time should not be set
+   *              by this call.
+   */
+  virtual void SetTimes(const std::string & path, uint64_t mtime, uint64_t atime, std::function<void(const Status &)> handler) = 0;
+  virtual Status SetTimes(const std::string & path, uint64_t mtime, uint64_t atime) = 0;
+
+  /**
+   * Returns metadata about the file if the file/directory exists.
+   **/
+  virtual void
+  GetFileInfo(const std::string &path,
+                  const std::function<void(const Status &, const StatInfo &)> &handler) = 0;
+  virtual Status GetFileInfo(const std::string &path, StatInfo & stat_info) = 0;
+
+  /**
+   * Returns the number of directories, files and bytes under the given path
+   **/
+  virtual void
+  GetContentSummary(const std::string &path,
+                  const std::function<void(const Status &, const ContentSummary &)> &handler) = 0;
+  virtual Status GetContentSummary(const std::string &path, ContentSummary & stat_info) = 0;
+
+  /**
+   * Retrieves the file system information as a whole, such as the total raw size of all files in the filesystem
+   * and the raw capacity of the filesystem
+   *
+   *  FsInfo struct is populated by GetFsStats
+   **/
+  virtual void GetFsStats(
+      const std::function<void(const Status &, const FsInfo &)> &handler) = 0;
+  virtual Status GetFsStats(FsInfo & fs_info) = 0;
+
+  /**
+   * Retrieves the files contained in a directory and returns the metadata
+   * for each of them.
+   *
+   * The asynchronous method will return batches of files; the consumer must
+   * return true if they want more files to be delivered.  The final bool
+   * parameter in the callback will be set to false if this is the final
+   * batch of files.
+   *
+   * The synchronous method will return all files in the directory.
+   *
+   * Path must be an absolute path in the hdfs filesytem (e.g. /tmp/foo/bar)
+   **/
+  virtual void
+  GetListing(const std::string &path,
+                  const std::function<bool(const Status &, const std::vector<StatInfo> &, bool)> &handler) = 0;
+  virtual Status GetListing(const std::string &path, std::vector<StatInfo> * stat_infos) = 0;
+
+  /**
+   * Returns the locations of all known blocks for the indicated file (or part of it), or an error
+   * if the information could not be found
+   */
+  virtual void GetBlockLocations(const std::string & path, uint64_t offset, uint64_t length,
+    const std::function<void(const Status &, std::shared_ptr<FileBlockLocation> locations)> ) = 0;
+  virtual Status GetBlockLocations(const std::string & path, uint64_t offset, uint64_t length,
+    std::shared_ptr<FileBlockLocation> * locations) = 0;
+
+  /**
+   * Creates a new directory
+   *
+   *  @param path           Path to the directory to be created (must be non-empty)
+   *  @param permissions    Permissions for the new directory   (negative value for the default permissions)
+   *  @param createparent   Create parent directories if they do not exist (may not be empty)
+   */
+  virtual void Mkdirs(const std::string & path, uint16_t permissions, bool createparent,
+      std::function<void(const Status &)> handler) = 0;
+  virtual Status Mkdirs(const std::string & path, uint16_t permissions, bool createparent) = 0;
+
+  /**
+   *  Delete the given file or directory from the file system.
+   *  <p>
+   *  same as delete but provides a way to avoid accidentally
+   *  deleting non empty directories programmatically.
+   *  @param path existing name (must be non-empty)
+   *  @param recursive if true deletes a non empty directory recursively
+   */
+  virtual void Delete(const std::string &path, bool recursive,
+      const std::function<void(const Status &)> &handler) = 0;
+  virtual Status Delete(const std::string &path, bool recursive) = 0;
+
+  /**
+   *  Rename - Rename file.
+   *  @param oldPath The path of the source file.       (must be non-empty)
+   *  @param newPath The path of the destination file.  (must be non-empty)
+   */
+  virtual void Rename(const std::string &oldPath, const std::string &newPath,
+      const std::function<void(const Status &)> &handler) = 0;
+  virtual Status Rename(const std::string &oldPath, const std::string &newPath) = 0;
+
+  /**
+   * Set permissions for an existing file/directory.
+   *
+   * @param path          the path to the file or directory
+   * @param permissions   the bitmask to set it to (should be between 0 and 01777)
+   */
+  virtual void SetPermission(const std::string & path, uint16_t permissions,
+      const std::function<void(const Status &)> &handler) = 0;
+  virtual Status SetPermission(const std::string & path, uint16_t permissions) = 0;
+
+  /**
+   * Set Owner of a path (i.e. a file or a directory).
+   * The parameters username and groupname can be empty.
+   * @param path      file path
+   * @param username  If it is empty, the original username remains unchanged.
+   * @param groupname If it is empty, the original groupname remains unchanged.
+   */
+  virtual void SetOwner(const std::string & path, const std::string & username,
+      const std::string & groupname, const std::function<void(const Status &)> &handler) = 0;
+  virtual Status SetOwner(const std::string & path,
+      const std::string & username, const std::string & groupname) = 0;
+
+  /**
+   * Finds all files matching the specified name recursively starting from the
+   * specified directory. Returns metadata for each of them.
+   *
+   * Example: Find("/dir?/tree*", "some?file*name")
+   *
+   * @param path       Absolute path at which to begin search, can have wild cards (must be non-blank)
+   * @param name       Name to find, can also have wild cards                      (must be non-blank)
+   *
+   * The asynchronous method will return batches of files; the consumer must
+   * return true if they want more files to be delivered.  The final bool
+   * parameter in the callback will be set to false if this is the final
+   * batch of files.
+   *
+   * The synchronous method will return matching files.
+   **/
+  virtual void
+  Find(const std::string &path, const std::string &name, const uint32_t maxdepth,
+                  const std::function<bool(const Status &, const std::vector<StatInfo> & , bool)> &handler) = 0;
+  virtual Status Find(const std::string &path, const std::string &name,
+                  const uint32_t maxdepth, std::vector<StatInfo> * stat_infos) = 0;
+
+
+  /*****************************************************************************
+   *                    FILE SYSTEM SNAPSHOT FUNCTIONS
+   ****************************************************************************/
+
+  /**
+   * Creates a snapshot of a snapshottable directory specified by path
+   *
+   *  @param path    Path to the directory to be snapshotted (must be non-empty)
+   *  @param name    Name to be given to the created snapshot (may be empty)
+   **/
+  virtual void CreateSnapshot(const std::string &path, const std::string &name,
+      const std::function<void(const Status &)> &handler) = 0;
+  virtual Status CreateSnapshot(const std::string &path,
+      const std::string &name) = 0;
+
+  /**
+   * Deletes the directory snapshot specified by path and name
+   *
+   *  @param path    Path to the snapshotted directory (must be non-empty)
+   *  @param name    Name of the snapshot to be deleted (must be non-empty)
+   **/
+  virtual void DeleteSnapshot(const std::string &path, const std::string &name,
+      const std::function<void(const Status &)> &handler) = 0;
+  virtual Status DeleteSnapshot(const std::string &path,
+      const std::string &name) = 0;
+
+  /**
+   * Renames the directory snapshot specified by path from old_name to new_name
+   *
+   *  @param path       Path to the snapshotted directory (must be non-blank)
+   *  @param old_name   Current name of the snapshot (must be non-blank)
+   *  @param new_name   New name of the snapshot (must be non-blank)
+   **/
+  virtual void RenameSnapshot(const std::string &path, const std::string &old_name,
+      const std::string &new_name, const std::function<void(const Status &)> &handler) = 0;
+  virtual Status RenameSnapshot(const std::string &path, const std::string &old_name,
+      const std::string &new_name) = 0;
+
+  /**
+   * Allows snapshots to be made on the specified directory
+   *
+   *  @param path    Path to the directory to be made snapshottable (must be non-empty)
+   **/
+  virtual void AllowSnapshot(const std::string &path,
+      const std::function<void(const Status &)> &handler) = 0;
+  virtual Status AllowSnapshot(const std::string &path) = 0;
+
+  /**
+   * Disallows snapshots to be made on the specified directory
+   *
+   *  @param path    Path to the directory to be made non-snapshottable (must be non-empty)
+   **/
+  virtual void DisallowSnapshot(const std::string &path,
+      const std::function<void(const Status &)> &handler) = 0;
+  virtual Status DisallowSnapshot(const std::string &path) = 0;
+
+  /**
+   * Note that it is an error to destroy the filesystem from within a filesystem
+   * callback.  It will lead to a deadlock and the termination of the process.
+   */
+  virtual ~FileSystem();
+
+
+  /**
+   * Sets an event callback for fs-level event notifications (such as connecting
+   * to the NameNode, communications errors with the NN, etc.)
+   *
+   * Many events are defined in hdfspp/events.h; the consumer should also expect
+   * to be called with many private events, which can be ignored.
+   *
+   * @param callback The function to call when a reporting event occurs.
+   */
+  virtual void SetFsEventCallback(fs_event_callback callback) = 0;
+
+  virtual Options get_options() = 0;
+
+  virtual std::string get_cluster_name() = 0;
+};
+}
+
+#endif

+ 110 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/locks.h

@@ -0,0 +1,110 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_HDFS_LOCKS_H_
+#define COMMON_HDFS_LOCKS_H_
+
+#include <stdexcept>
+#include <string>
+#include <atomic>
+#include <mutex>
+#include <memory>
+
+namespace hdfs
+{
+
+//
+//  Thrown by LockGuard to indicate that it was unable to acquire a mutex
+//  what_str should contain info about what caused the failure
+//
+class LockFailure : public std::runtime_error {
+ public:
+  LockFailure(const char *what_str) : std::runtime_error(what_str) {};
+  LockFailure(const std::string& what_str) : std::runtime_error(what_str) {};
+};
+
+//
+//  A pluggable mutex type to allow client code to share mutexes it may
+//  already use to protect certain system resources.  Certain shared
+//  libraries have some procedures that aren't always implemented in a thread
+//  safe manner. If libhdfs++ and the code linking it depend on the same
+//  library this provides a mechanism to coordinate safe access.
+//
+//  Interface provided is intended to be similar to std::mutex.  If the lock
+//  can't be aquired it may throw LockFailure from the lock method. If lock
+//  does fail libhdfs++ is expected fail as cleanly as possible e.g.
+//  FileSystem::Mkdirs might return a MutexError but a subsequent call may be
+//  successful.
+//
+class Mutex {
+ public:
+  virtual ~Mutex() {};
+  virtual void lock() = 0;
+  virtual void unlock() = 0;
+  virtual std::string str() = 0;
+};
+
+//
+//  LockGuard works in a similar manner to std::lock_guard: it locks the mutex
+//  in the constructor and unlocks it in the destructor.
+//  Failure to acquire the mutex in the constructor will result in throwing a
+//  LockFailure exception.
+//
+class LockGuard {
+ public:
+  LockGuard(Mutex *m);
+  ~LockGuard();
+ private:
+  Mutex *_mtx;
+};
+
+//
+//  Manage instances of hdfs::Mutex that are intended to be global to the
+//  process.
+//
+//  LockManager's InitLocks method provides a mechanism for the calling
+//  application to share its own implementations of hdfs::Mutex.  It must be
+//  called prior to instantiating any FileSystem objects and can only be
+//  called once.  If a lock is not provided a default mutex type wrapping
+//  std::mutex is used as a default.
+//
+
+class LockManager {
+ public:
+  // Initializes with a default set of C++11 style mutexes
+  static bool InitLocks(Mutex *gssapi);
+  static Mutex *getGssapiMutex();
+
+  // Tests only, implementation may no-op on release builds.
+  // Reset _finalized to false and set all Mutex* members to default values.
+  static void TEST_reset_manager();
+  static Mutex *TEST_get_default_mutex();
+ private:
+  // Used only in tests.
+  static Mutex *TEST_default_mutex;
+  // Use to synchronize calls into GSSAPI/Kerberos libs
+  static Mutex *gssapiMtx;
+
+  // Prevent InitLocks from being called more than once
+  // Allows all locks to be set a single time atomically
+  static std::mutex _state_lock;
+  static bool _finalized;
+};
+
+} // end namespace hdfs
+#endif

+ 60 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/log.h

@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBHDFSPP_HDFS_LOG
+#define LIBHDFSPP_HDFS_LOG
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ *  Things that are part of the public API but are specific to logging live here.
+ *  Added to avoid including the whole public API into the implementation of the logger.
+ **/
+
+/* logging levels, compatible with enum in lib/common/logging.cc */
+#define HDFSPP_LOG_LEVEL_TRACE 0
+#define HDFSPP_LOG_LEVEL_DEBUG 1
+#define HDFSPP_LOG_LEVEL_INFO  2
+#define HDFSPP_LOG_LEVEL_WARN  3
+#define HDFSPP_LOG_LEVEL_ERROR 4
+
+/* components emitting messages, compatible with enum lib/common/logging.cc */
+#define HDFSPP_LOG_COMPONENT_UNKNOWN      1 << 0
+#define HDFSPP_LOG_COMPONENT_RPC          1 << 1
+#define HDFSPP_LOG_COMPONENT_BLOCKREADER  1 << 2
+#define HDFSPP_LOG_COMPONENT_FILEHANDLE   1 << 3
+#define HDFSPP_LOG_COMPONENT_FILESYSTEM   1 << 4
+
+/**
+ *  POD struct for C to consume (C++ interface gets to take advantage of RAII)
+ **/
+typedef struct {
+  const char *msg;
+  int level;
+  int component;
+  const char *file_name;
+  int file_line;
+} LogData;
+
+#ifdef __cplusplus
+} // end extern C
+#endif
+
+#endif

+ 136 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/options.h

@@ -0,0 +1,136 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIBHDFSPP_OPTIONS_H_
+#define LIBHDFSPP_OPTIONS_H_
+
+#include "hdfspp/uri.h"
+
+#include <string>
+#include <vector>
+#include <map>
+
+namespace hdfs {
+
+
+struct NamenodeInfo {
+  NamenodeInfo(const std::string &nameservice_, const std::string &nodename_, const URI &uri_) :
+                nameservice(nameservice_), name(nodename_), uri(uri_) {}
+  NamenodeInfo(){}
+  //nameservice this belongs to
+  std::string nameservice;
+  //node name
+  std::string name;
+  //host:port
+  URI uri;
+
+  //get server hostname and port (aka service)
+  std::string get_host() const;
+  std::string get_port() const;
+};
+
+/**
+ * Options to control the behavior of the libhdfspp library.
+ **/
+struct Options {
+  /**
+   * Time out of RPC requests in milliseconds.
+   * Default: 30000
+   **/
+  int rpc_timeout;
+  static const int kDefaultRpcTimeout = 30000;
+
+  /**
+   * Time to wait for an RPC connection before failing
+   * Default: 30000
+   **/
+  int rpc_connect_timeout;
+  static const int kDefaultRpcConnectTimeout = 30000;
+
+  /**
+   * Maximum number of retries for RPC operations
+   **/
+  int max_rpc_retries;
+  static const int kNoRetry = 0;
+  static const int kDefaultMaxRpcRetries = kNoRetry;
+
+  /**
+   * Number of ms to wait between retry of RPC operations
+   **/
+  int rpc_retry_delay_ms;
+  static const int kDefaultRpcRetryDelayMs = 10000;
+
+  /**
+   * Exclusion time for failed datanodes in milliseconds.
+   * Default: 60000
+   **/
+  unsigned int host_exclusion_duration;
+  static const unsigned int kDefaultHostExclusionDuration = 600000;
+
+  /**
+   * URI to connect to if no host:port are specified in connect
+   */
+  URI defaultFS;
+
+  /**
+   * Namenodes used to provide HA for this cluster if applicable
+   **/
+  std::map<std::string, std::vector<NamenodeInfo>> services;
+
+
+  /**
+   * Client failover attempts before failover gives up
+   **/
+  int failover_max_retries;
+  static const unsigned int kDefaultFailoverMaxRetries = 4;
+
+  /**
+   * Client failover attempts before failover gives up if server
+   * connection is timing out.
+   **/
+  int failover_connection_max_retries;
+  static const unsigned int kDefaultFailoverConnectionMaxRetries = 0;
+
+  /*
+   * Which form of authentication to use with the server
+   * Default: simple
+   */
+  enum Authentication {
+      kSimple,
+      kKerberos
+  };
+  Authentication authentication;
+  static const Authentication kDefaultAuthentication = kSimple;
+
+  /**
+   * Block size in bytes.
+   * Default: 128 * 1024 * 1024 = 134217728
+   **/
+  long block_size;
+  static const long kDefaultBlockSize = 128*1024*1024;
+
+  /**
+   * Asio worker thread count
+   * default: -1, indicates number of hardware threads
+   **/
+  int io_threads_;
+  static const int kDefaultIoThreads = -1;
+
+  Options();
+};
+}
+#endif

+ 59 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/statinfo.h

@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef HDFSPP_STATINFO_H_
+#define HDFSPP_STATINFO_H_
+
+#include <string>
+
+namespace hdfs {
+
+/**
+ * Information that is assumed to be unchanging about a file for the duration of
+ * the operations.
+ */
+struct StatInfo {
+  enum FileType {
+    IS_DIR = 1,
+    IS_FILE = 2,
+    IS_SYMLINK = 3
+  };
+
+  int          file_type;
+  std::string  path;
+  std::string  full_path;
+  uint64_t     length;
+  uint64_t     permissions;  //Octal number as in POSIX permissions; e.g. 0777
+  std::string  owner;
+  std::string  group;
+  uint64_t     modification_time;
+  uint64_t     access_time;
+  std::string  symlink;
+  uint32_t     block_replication;
+  uint64_t     blocksize;
+  uint64_t     fileid;
+  uint64_t     children_num;
+
+  StatInfo();
+
+  //Converts StatInfo object to std::string (hdfs_ls format)
+  std::string str() const;
+};
+
+}
+
+#endif

+ 111 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/status.h

@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIBHDFSPP_STATUS_H_
+#define LIBHDFSPP_STATUS_H_
+
+#include <string>
+#include <system_error>
+
+namespace hdfs {
+
+class Status {
+ public:
+  // Create a success status.
+  Status() : code_(0) {}
+
+  // Note: Avoid calling the Status constructors directly, call the factory methods instead
+
+  // Used for common status  types
+  Status(int code, const char *msg);
+  // Used for server side exceptions reported through RpcResponseProto and similar
+  Status(int code, const char *exception_class, const char *exception_details);
+
+  // Factory methods
+  static Status OK();
+  static Status InvalidArgument(const char *msg);
+  static Status ResourceUnavailable(const char *msg);
+  static Status Unimplemented();
+  static Status Exception(const char *exception_class_name, const char *exception_details);
+  static Status Error(const char *error_message);
+  static Status AuthenticationFailed();
+  static Status AuthenticationFailed(const char *msg);
+  static Status AuthorizationFailed();
+  static Status AuthorizationFailed(const char *msg);
+  static Status Canceled();
+  static Status PathNotFound(const char *msg);
+  static Status InvalidOffset(const char *msg);
+  static Status PathIsNotDirectory(const char *msg);
+  static Status MutexError(const char *msg);
+
+  // success
+  bool ok() const { return code_ == 0; }
+
+  bool is_invalid_offset() const { return code_ == kInvalidOffset; }
+
+  // contains ENOENT error
+  bool pathNotFound() const { return code_ == kPathNotFound; }
+
+  // Returns the string "OK" for success.
+  std::string ToString() const;
+
+  // get error code
+  int code() const { return code_; }
+
+  // if retry can possibly recover an error
+  bool notWorthRetry() const;
+
+  enum Code {
+    kOk = 0,
+    kInvalidArgument = static_cast<unsigned>(std::errc::invalid_argument),
+    kResourceUnavailable = static_cast<unsigned>(std::errc::resource_unavailable_try_again),
+    kUnimplemented = static_cast<unsigned>(std::errc::function_not_supported),
+    kOperationCanceled = static_cast<unsigned>(std::errc::operation_canceled),
+    kPermissionDenied = static_cast<unsigned>(std::errc::permission_denied),
+    kPathNotFound = static_cast<unsigned>(std::errc::no_such_file_or_directory),
+    kNotADirectory = static_cast<unsigned>(std::errc::not_a_directory),
+    kFileAlreadyExists = static_cast<unsigned>(std::errc::file_exists),
+    kPathIsNotEmptyDirectory = static_cast<unsigned>(std::errc::directory_not_empty),
+    kBusy = static_cast<unsigned>(std::errc::device_or_resource_busy),
+
+    // non-errc codes start at 256
+    kException = 256,
+    kAuthenticationFailed = 257,
+    kAccessControlException = 258,
+    kStandbyException = 259,
+    kSnapshotProtocolException = 260,
+    kInvalidOffset = 261,
+  };
+
+  std::string get_exception_class_str() const {
+    return exception_class_;
+  }
+
+  int get_server_exception_type() const {
+    return code_;
+  }
+
+ private:
+  int code_;
+  std::string msg_;
+
+  std::string exception_class_;
+};
+
+}
+
+#endif

+ 137 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/uri.h

@@ -0,0 +1,137 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_HDFS_URI_H_
+#define COMMON_HDFS_URI_H_
+
+#include <iostream>
+#include <string>
+#include <vector>
+#include <stdexcept>
+
+namespace hdfs
+{
+
+class uri_parse_error : public std::invalid_argument {
+ public:
+  uri_parse_error(const char *what_str) : std::invalid_argument(what_str) {}
+  uri_parse_error(const std::string& what_str) : std::invalid_argument(what_str) {}
+};
+
+class URI {
+public:
+  // Parse a string into a URI.  Throw a hdfs::uri_parse_error if URI is malformed.
+  static URI parse_from_string(const std::string &str);
+
+  // URI encode/decode strings
+  static std::string encode  (const std::string &input);
+  static std::string decode  (const std::string &input);
+
+  URI();
+
+  std::string get_scheme(bool encoded_output=false) const;
+
+  void set_scheme(const std::string &s, bool encoded_input=false);
+
+  // empty if none.
+  std::string get_host(bool encoded_output=false) const;
+
+  void set_host(const std::string& h, bool encoded_input=false);
+
+  // true if port has been set
+  bool has_port() const;
+
+  // undefined if port hasn't been set
+  uint16_t get_port() const;
+
+  // use default if port hasn't been set
+  uint16_t get_port_or_default(uint16_t default_val) const;
+
+  void set_port(uint16_t p);
+
+  void clear_port();
+
+  std::string get_path(bool encoded_output=false) const;
+
+  void set_path(const std::string &p, bool encoded_input=false);
+
+  void add_path(const std::string &p, bool encoded_input=false);
+
+  std::vector<std::string> get_path_elements(bool encoded_output=false) const;
+
+  struct Query {
+    Query(const std::string& key, const std::string& val);
+    std::string key;
+    std::string value;
+  };
+
+  std::string get_query(bool encoded_output=false) const;
+
+  std::vector<Query> get_query_elements(bool encoded_output=false) const;
+
+  // Not that set_query must always pass in encoded strings
+  void set_query(const std::string &q);
+
+  // Adds a parameter onto the query; does not check if it already exists
+  //   e.g. parseFromString("foo?bar=baz").addQuery("bing","bang")
+  //   would leave "bar=baz&bing=bang" as the query
+  void add_query(const std::string &name, const std::string & value, bool encoded_input=false);
+
+  // Removes the query part if exists
+  //   e.g. parseFromString("foo?bar=baz&bing=bang&bar=bong").removeQueries("bar")
+  //   would leave bing=bang as the query
+  void remove_query(const std::string &q_name, bool encoded_input=false);
+
+  std::string get_fragment(bool encoded_output=false) const;
+
+  void set_fragment(const std::string &f, bool encoded_input=false);
+
+  std::string str(bool encoded_output=true) const;
+
+  // Get a string with each URI field printed on a seperate line
+  std::string GetDebugString() const;
+private:
+  // These are stored in encoded form
+  std::string scheme;
+  std::string user;
+  std::string pass;
+  std::string host;
+  std::vector<std::string> path;
+  std::vector<Query> queries;
+  std::string fragment;
+  // implicitly narrowed to uint16_t if positive
+  // -1 to indicate uninitialized
+  int32_t _port;
+
+  // URI encoding helpers
+  static std::string from_encoded(bool encoded_output, const std::string & input);
+  static std::string to_encoded(bool encoded_input, const std::string & input);
+
+  bool has_authority() const;
+  std::string build_authority(bool encoded_output) const;
+
+  std::string build_path(bool encoded_output) const;
+  void parse_path(bool input_encoded, const std::string &input_path);
+};
+
+inline std::ostream& operator<<(std::ostream &out, const URI &uri) {
+  return out << uri.str();
+}
+
+}
+#endif

+ 25 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/CMakeLists.txt

@@ -0,0 +1,25 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_subdirectory(common)
+add_subdirectory(fs)
+add_subdirectory(reader)
+add_subdirectory(rpc)
+add_subdirectory(proto)
+add_subdirectory(connection)
+add_subdirectory(bindings)

+ 19 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/bindings/CMakeLists.txt

@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_subdirectory(c)

+ 21 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/bindings/c/CMakeLists.txt

@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+add_library(bindings_c_obj OBJECT hdfs.cc)
+add_dependencies(bindings_c_obj fs rpc reader proto common fs rpc reader proto common)
+add_library(bindings_c $<TARGET_OBJECTS:bindings_c_obj>)

+ 2007 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/bindings/c/hdfs.cc

@@ -0,0 +1,2007 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hdfspp/hdfspp.h"
+
+#include "fs/filesystem.h"
+#include "common/hdfs_configuration.h"
+#include "common/configuration_loader.h"
+#include "common/logging.h"
+
+#include <hdfs/hdfs.h>
+#include <hdfspp/hdfs_ext.h>
+
+#include <libgen.h>
+#include "limits.h"
+
+#include <string>
+#include <cstring>
+#include <iostream>
+#include <algorithm>
+#include <functional>
+
+using namespace hdfs;
+using std::experimental::nullopt;
+using namespace std::placeholders;
+
+static constexpr tPort kDefaultPort = 8020;
+
+/** Annotate what parts of the code below are implementatons of API functions
+ *  and if they are normal vs. extended API.
+ */
+#define LIBHDFS_C_API
+#define LIBHDFSPP_EXT_API
+
+/* Separate the handles used by the C api from the C++ API*/
+struct hdfs_internal {
+  hdfs_internal(FileSystem *p) : filesystem_(p), working_directory_("/") {}
+  hdfs_internal(std::unique_ptr<FileSystem> p)
+      : filesystem_(std::move(p)), working_directory_("/") {}
+  virtual ~hdfs_internal(){};
+  FileSystem *get_impl() { return filesystem_.get(); }
+  const FileSystem *get_impl() const { return filesystem_.get(); }
+  std::string get_working_directory() {
+    std::lock_guard<std::mutex> read_guard(wd_lock_);
+    return working_directory_;
+  }
+  void set_working_directory(std::string new_directory) {
+    std::lock_guard<std::mutex> write_guard(wd_lock_);
+    working_directory_ = new_directory;
+  }
+
+ private:
+  std::unique_ptr<FileSystem> filesystem_;
+  std::string working_directory_;      //has to always start and end with '/'
+  std::mutex wd_lock_;                 //synchronize access to the working directory
+};
+
+struct hdfsFile_internal {
+  hdfsFile_internal(FileHandle *p) : file_(p) {}
+  hdfsFile_internal(std::unique_ptr<FileHandle> p) : file_(std::move(p)) {}
+  virtual ~hdfsFile_internal(){};
+  FileHandle *get_impl() { return file_.get(); }
+  const FileHandle *get_impl() const { return file_.get(); }
+
+ private:
+  std::unique_ptr<FileHandle> file_;
+};
+
+/* Keep thread local copy of last error string */
+thread_local std::string errstr;
+
+/* Fetch last error that happened in this thread */
+LIBHDFSPP_EXT_API
+int hdfsGetLastError(char *buf, int len) {
+  //No error message
+  if(errstr.empty()){
+    return -1;
+  }
+
+  //There is an error, but no room for the error message to be copied to
+  if(nullptr == buf || len < 1) {
+    return -1;
+  }
+
+  /* leave space for a trailing null */
+  size_t copylen = std::min((size_t)errstr.size(), (size_t)len);
+  if(copylen == (size_t)len) {
+    copylen--;
+  }
+
+  strncpy(buf, errstr.c_str(), copylen);
+
+  /* stick in null */
+  buf[copylen] = 0;
+
+  return 0;
+}
+
+/* Event callbacks for next open calls */
+thread_local std::experimental::optional<fs_event_callback> fsEventCallback;
+thread_local std::experimental::optional<file_event_callback> fileEventCallback;
+
+struct hdfsBuilder {
+  hdfsBuilder();
+  hdfsBuilder(const char * directory);
+  virtual ~hdfsBuilder() {}
+  ConfigurationLoader loader;
+  HdfsConfiguration config;
+
+  optional<std::string> overrideHost;
+  optional<tPort>       overridePort;
+  optional<std::string> user;
+
+  static constexpr tPort kUseDefaultPort = 0;
+};
+
+/* Error handling with optional debug to stderr */
+static void ReportError(int errnum, const std::string & msg) {
+  errno = errnum;
+  errstr = msg;
+#ifdef LIBHDFSPP_C_API_ENABLE_DEBUG
+  std::cerr << "Error: errno=" << strerror(errnum) << " message=\"" << msg
+            << "\"" << std::endl;
+#else
+  (void)msg;
+#endif
+}
+
+/* Convert Status wrapped error into appropriate errno and return code */
+static int Error(const Status &stat) {
+  const char * default_message;
+  int errnum;
+
+  int code = stat.code();
+  switch (code) {
+    case Status::Code::kOk:
+      return 0;
+    case Status::Code::kInvalidArgument:
+      errnum = EINVAL;
+      default_message = "Invalid argument";
+      break;
+    case Status::Code::kResourceUnavailable:
+      errnum = EAGAIN;
+      default_message = "Resource temporarily unavailable";
+      break;
+    case Status::Code::kUnimplemented:
+      errnum = ENOSYS;
+      default_message = "Function not implemented";
+      break;
+    case Status::Code::kException:
+      errnum = EINTR;
+      default_message = "Exception raised";
+      break;
+    case Status::Code::kOperationCanceled:
+      errnum = EINTR;
+      default_message = "Operation canceled";
+      break;
+    case Status::Code::kPermissionDenied:
+      errnum = EACCES;
+      default_message = "Permission denied";
+      break;
+    case Status::Code::kPathNotFound:
+      errnum = ENOENT;
+      default_message = "No such file or directory";
+      break;
+    case Status::Code::kNotADirectory:
+      errnum = ENOTDIR;
+      default_message = "Not a directory";
+      break;
+    case Status::Code::kFileAlreadyExists:
+      errnum = EEXIST;
+      default_message = "File already exists";
+      break;
+    case Status::Code::kPathIsNotEmptyDirectory:
+      errnum = ENOTEMPTY;
+      default_message = "Directory is not empty";
+      break;
+    case Status::Code::kInvalidOffset:
+      errnum = Status::Code::kInvalidOffset;
+      default_message = "Trying to begin a read past the EOF";
+      break;
+    default:
+      errnum = ENOSYS;
+      default_message = "Error: unrecognised code";
+  }
+  if (stat.ToString().empty())
+    ReportError(errnum, default_message);
+  else
+    ReportError(errnum, stat.ToString());
+  return -1;
+}
+
+static int ReportException(const std::exception & e)
+{
+  return Error(Status::Exception("Uncaught exception", e.what()));
+}
+
+static int ReportCaughtNonException()
+{
+  return Error(Status::Exception("Uncaught value not derived from std::exception", ""));
+}
+
+/* return false on failure */
+bool CheckSystem(hdfsFS fs) {
+  if (!fs) {
+    ReportError(ENODEV, "Cannot perform FS operations with null FS handle.");
+    return false;
+  }
+
+  return true;
+}
+
+/* return false on failure */
+bool CheckHandle(hdfsFile file) {
+  if (!file) {
+    ReportError(EBADF, "Cannot perform FS operations with null File handle.");
+    return false;
+  }
+  return true;
+}
+
+/* return false on failure */
+bool CheckSystemAndHandle(hdfsFS fs, hdfsFile file) {
+  if (!CheckSystem(fs))
+    return false;
+
+  if (!CheckHandle(file))
+    return false;
+
+  return true;
+}
+
+optional<std::string> getAbsolutePath(hdfsFS fs, const char* path) {
+  //Does not support . (dot) and .. (double dot) semantics
+  if (!path || path[0] == '\0') {
+    Error(Status::InvalidArgument("getAbsolutePath: argument 'path' cannot be NULL or empty"));
+    return optional<std::string>();
+  }
+  if (path[0] != '/') {
+    //we know that working directory always ends with '/'
+    return fs->get_working_directory().append(path);
+  }
+  return optional<std::string>(path);
+}
+
+/**
+ * C API implementations
+ **/
+
+LIBHDFS_C_API
+int hdfsFileIsOpenForRead(hdfsFile file) {
+  /* files can only be open for reads at the moment, do a quick check */
+  if (!CheckHandle(file)){
+    return 0;
+  }
+  return 1; // Update implementation when we get file writing
+}
+
+LIBHDFS_C_API
+int hdfsFileIsOpenForWrite(hdfsFile file) {
+  /* files can only be open for reads at the moment, so return false */
+  CheckHandle(file);
+  return -1; // Update implementation when we get file writing
+}
+
+int hdfsConfGetLong(const char *key, int64_t *val)
+{
+  try
+  {
+    errno = 0;
+    hdfsBuilder builder;
+    return hdfsBuilderConfGetLong(&builder, key, val);
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+hdfsFS doHdfsConnect(optional<std::string> nn, optional<tPort> port, optional<std::string> user, const Options & options) {
+  try
+  {
+    errno = 0;
+    IoService * io_service = IoService::New();
+
+    FileSystem *fs = FileSystem::New(io_service, user.value_or(""), options);
+    if (!fs) {
+      ReportError(ENODEV, "Could not create FileSystem object");
+      return nullptr;
+    }
+
+    if (fsEventCallback) {
+      fs->SetFsEventCallback(fsEventCallback.value());
+    }
+
+    Status status;
+    if (nn || port) {
+      if (!port) {
+        port = kDefaultPort;
+      }
+      std::string port_as_string = std::to_string(*port);
+      status = fs->Connect(nn.value_or(""), port_as_string);
+    } else {
+      status = fs->ConnectToDefaultFs();
+    }
+
+    if (!status.ok()) {
+      Error(status);
+
+      // FileSystem's ctor might take ownership of the io_service; if it does,
+      //    it will null out the pointer
+      if (io_service)
+        delete io_service;
+
+      delete fs;
+
+      return nullptr;
+    }
+    return new hdfs_internal(fs);
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return nullptr;
+  } catch (...) {
+    ReportCaughtNonException();
+    return nullptr;
+  }
+}
+
+LIBHDFSPP_EXT_API
+hdfsFS hdfsAllocateFileSystem(struct hdfsBuilder *bld) {
+  // Same idea as the first half of doHdfsConnect, but return the wrapped FS before
+  // connecting.
+  try {
+    errno = 0;
+    std::shared_ptr<IoService> io_service = IoService::MakeShared();
+
+    int io_thread_count = bld->config.GetOptions().io_threads_;
+    if(io_thread_count < 1) {
+      io_service->InitDefaultWorkers();
+    } else {
+      io_service->InitWorkers(io_thread_count);
+    }
+
+    FileSystem *fs = FileSystem::New(io_service, bld->user.value_or(""), bld->config.GetOptions());
+    if (!fs) {
+      ReportError(ENODEV, "Could not create FileSystem object");
+      return nullptr;
+    }
+
+    if (fsEventCallback) {
+      fs->SetFsEventCallback(fsEventCallback.value());
+    }
+
+    return new hdfs_internal(fs);
+  } catch (const std::exception &e) {
+    ReportException(e);
+    return nullptr;
+  } catch (...) {
+    ReportCaughtNonException();
+    return nullptr;
+  }
+  return nullptr;
+}
+
+LIBHDFSPP_EXT_API
+int hdfsConnectAllocated(hdfsFS fs, struct hdfsBuilder *bld) {
+  if(!CheckSystem(fs)) {
+    return ENODEV;
+  }
+
+  if(!bld) {
+    ReportError(ENODEV, "No hdfsBuilder object supplied");
+    return ENODEV;
+  }
+
+  // Get C++ FS to do connect
+  FileSystem *fsImpl = fs->get_impl();
+  if(!fsImpl) {
+    ReportError(ENODEV, "Null FileSystem implementation");
+    return ENODEV;
+  }
+
+  // Unpack the required bits of the hdfsBuilder
+  optional<std::string> nn = bld->overrideHost;
+  optional<tPort> port = bld->overridePort;
+  optional<std::string> user = bld->user;
+
+  // try-catch in case some of the third-party stuff throws
+  try {
+    Status status;
+    if (nn || port) {
+      if (!port) {
+        port = kDefaultPort;
+      }
+      std::string port_as_string = std::to_string(*port);
+      status = fsImpl->Connect(nn.value_or(""), port_as_string);
+    } else {
+      status = fsImpl->ConnectToDefaultFs();
+    }
+
+    if (!status.ok()) {
+      Error(status);
+      return ENODEV;
+    }
+
+    // 0 to indicate a good connection
+    return 0;
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return ENODEV;
+  } catch (...) {
+    ReportCaughtNonException();
+    return ENODEV;
+  }
+
+  return 0;
+}
+
+LIBHDFS_C_API
+hdfsFS hdfsConnect(const char *nn, tPort port) {
+  return hdfsConnectAsUser(nn, port, "");
+}
+
+LIBHDFS_C_API
+hdfsFS hdfsConnectAsUser(const char* nn, tPort port, const char *user) {
+  return doHdfsConnect(std::string(nn), port, std::string(user), Options());
+}
+
+LIBHDFS_C_API
+hdfsFS hdfsConnectAsUserNewInstance(const char* nn, tPort port, const char *user ) {
+  //libhdfspp always returns a new instance
+  return doHdfsConnect(std::string(nn), port, std::string(user), Options());
+}
+
+LIBHDFS_C_API
+hdfsFS hdfsConnectNewInstance(const char* nn, tPort port) {
+  //libhdfspp always returns a new instance
+  return hdfsConnectAsUser(nn, port, "");
+}
+
+LIBHDFSPP_EXT_API
+int hdfsCancelPendingConnection(hdfsFS fs) {
+  // todo: stick an enum in hdfs_internal to check the connect state
+  if(!CheckSystem(fs)) {
+    return ENODEV;
+  }
+
+  FileSystem *fsImpl = fs->get_impl();
+  if(!fsImpl) {
+    ReportError(ENODEV, "Null FileSystem implementation");
+    return ENODEV;
+  }
+
+  bool canceled = fsImpl->CancelPendingConnect();
+  if(canceled) {
+    return 0;
+  } else {
+    return EINTR;
+  }
+}
+
+LIBHDFS_C_API
+int hdfsDisconnect(hdfsFS fs) {
+  try
+  {
+    errno = 0;
+    if (!fs) {
+      ReportError(ENODEV, "Cannot disconnect null FS handle.");
+      return -1;
+    }
+
+    delete fs;
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+hdfsFile hdfsOpenFile(hdfsFS fs, const char *path, int flags, int bufferSize,
+                      short replication, tSize blocksize) {
+  try
+  {
+    errno = 0;
+    (void)flags;
+    (void)bufferSize;
+    (void)replication;
+    (void)blocksize;
+    if (!fs) {
+      ReportError(ENODEV, "Cannot perform FS operations with null FS handle.");
+      return nullptr;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return nullptr;
+    }
+    FileHandle *f = nullptr;
+    Status stat = fs->get_impl()->Open(*abs_path, &f);
+    if (!stat.ok()) {
+      Error(stat);
+      return nullptr;
+    }
+    if (f && fileEventCallback) {
+      f->SetFileEventCallback(fileEventCallback.value());
+    }
+    return new hdfsFile_internal(f);
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return nullptr;
+  } catch (...) {
+    ReportCaughtNonException();
+    return nullptr;
+  }
+}
+
+LIBHDFS_C_API
+int hdfsCloseFile(hdfsFS fs, hdfsFile file) {
+  try
+  {
+    errno = 0;
+    if (!CheckSystemAndHandle(fs, file)) {
+      return -1;
+    }
+    delete file;
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+char* hdfsGetWorkingDirectory(hdfsFS fs, char *buffer, size_t bufferSize) {
+  try
+  {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return nullptr;
+    }
+    std::string wd = fs->get_working_directory();
+    size_t size = wd.size();
+    if (size + 1 > bufferSize) {
+      std::stringstream ss;
+      ss << "hdfsGetWorkingDirectory: bufferSize is " << bufferSize <<
+          ", which is not enough to fit working directory of size " << (size + 1);
+      Error(Status::InvalidArgument(ss.str().c_str()));
+      return nullptr;
+    }
+    wd.copy(buffer, size);
+    buffer[size] = '\0';
+    return buffer;
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return nullptr;
+  } catch (...) {
+    ReportCaughtNonException();
+    return nullptr;
+  }
+}
+
+LIBHDFS_C_API
+int hdfsSetWorkingDirectory(hdfsFS fs, const char* path) {
+  try
+  {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    //Enforce last character to be '/'
+    std::string withSlash = *abs_path;
+    char last = withSlash.back();
+    if (last != '/'){
+      withSlash += '/';
+    }
+    fs->set_working_directory(withSlash);
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+int hdfsAvailable(hdfsFS fs, hdfsFile file) {
+  //Since we do not have read ahead implemented, return 0 if fs and file are good;
+  errno = 0;
+  if (!CheckSystemAndHandle(fs, file)) {
+    return -1;
+  }
+  return 0;
+}
+
+LIBHDFS_C_API
+tOffset hdfsGetDefaultBlockSize(hdfsFS fs) {
+  try {
+    errno = 0;
+    return fs->get_impl()->get_options().block_size;
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return -1;
+  } catch (...) {
+    ReportCaughtNonException();
+    return -1;
+  }
+}
+
+LIBHDFS_C_API
+tOffset hdfsGetDefaultBlockSizeAtPath(hdfsFS fs, const char *path) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    uint64_t block_size;
+    Status stat = fs->get_impl()->GetPreferredBlockSize(*abs_path, block_size);
+    if (!stat.ok()) {
+      if (stat.pathNotFound()){
+        return fs->get_impl()->get_options().block_size;
+      } else {
+        return Error(stat);
+      }
+    }
+    return block_size;
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return -1;
+  } catch (...) {
+    ReportCaughtNonException();
+    return -1;
+  }
+}
+
+LIBHDFS_C_API
+int hdfsSetReplication(hdfsFS fs, const char* path, int16_t replication) {
+    try {
+      errno = 0;
+      if (!CheckSystem(fs)) {
+        return -1;
+      }
+      const optional<std::string> abs_path = getAbsolutePath(fs, path);
+      if(!abs_path) {
+        return -1;
+      }
+      if(replication < 1){
+        return Error(Status::InvalidArgument("SetReplication: argument 'replication' cannot be less than 1"));
+      }
+      Status stat;
+      stat = fs->get_impl()->SetReplication(*abs_path, replication);
+      if (!stat.ok()) {
+        return Error(stat);
+      }
+      return 0;
+    } catch (const std::exception & e) {
+      return ReportException(e);
+    } catch (...) {
+      return ReportCaughtNonException();
+    }
+}
+
+LIBHDFS_C_API
+int hdfsUtime(hdfsFS fs, const char* path, tTime mtime, tTime atime) {
+    try {
+      errno = 0;
+      if (!CheckSystem(fs)) {
+        return -1;
+      }
+      const optional<std::string> abs_path = getAbsolutePath(fs, path);
+      if(!abs_path) {
+        return -1;
+      }
+      Status stat;
+      stat = fs->get_impl()->SetTimes(*abs_path, mtime, atime);
+      if (!stat.ok()) {
+        return Error(stat);
+      }
+      return 0;
+    } catch (const std::exception & e) {
+      return ReportException(e);
+    } catch (...) {
+      return ReportCaughtNonException();
+    }
+}
+
+LIBHDFS_C_API
+tOffset hdfsGetCapacity(hdfsFS fs) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+
+    hdfs::FsInfo fs_info;
+    Status stat = fs->get_impl()->GetFsStats(fs_info);
+    if (!stat.ok()) {
+      Error(stat);
+      return -1;
+    }
+    return fs_info.capacity;
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return -1;
+  } catch (...) {
+    ReportCaughtNonException();
+    return -1;
+  }
+}
+
+LIBHDFS_C_API
+tOffset hdfsGetUsed(hdfsFS fs) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+
+    hdfs::FsInfo fs_info;
+    Status stat = fs->get_impl()->GetFsStats(fs_info);
+    if (!stat.ok()) {
+      Error(stat);
+      return -1;
+    }
+    return fs_info.used;
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return -1;
+  } catch (...) {
+    ReportCaughtNonException();
+    return -1;
+  }
+}
+
+void StatInfoToHdfsFileInfo(hdfsFileInfo * file_info,
+                            const hdfs::StatInfo & stat_info) {
+  /* file or directory */
+  if (stat_info.file_type == StatInfo::IS_DIR) {
+    file_info->mKind = kObjectKindDirectory;
+  } else if (stat_info.file_type == StatInfo::IS_FILE) {
+    file_info->mKind = kObjectKindFile;
+  } else {
+    file_info->mKind = kObjectKindFile;
+    LOG_WARN(kFileSystem, << "Symlink is not supported! Reporting as a file: ");
+  }
+
+  /* the name of the file */
+  char copyOfPath[PATH_MAX];
+  strncpy(copyOfPath, stat_info.path.c_str(), PATH_MAX);
+  copyOfPath[PATH_MAX - 1] = '\0'; // in case strncpy ran out of space
+
+  char * mName = basename(copyOfPath);
+  size_t mName_size = strlen(mName);
+  file_info->mName = new char[mName_size+1];
+  strncpy(file_info->mName, basename(copyOfPath), mName_size + 1);
+
+  /* the last modification time for the file in seconds */
+  file_info->mLastMod = (tTime) stat_info.modification_time;
+
+  /* the size of the file in bytes */
+  file_info->mSize = (tOffset) stat_info.length;
+
+  /* the count of replicas */
+  file_info->mReplication = (short) stat_info.block_replication;
+
+  /* the block size for the file */
+  file_info->mBlockSize = (tOffset) stat_info.blocksize;
+
+  /* the owner of the file */
+  file_info->mOwner = new char[stat_info.owner.size() + 1];
+  strncpy(file_info->mOwner, stat_info.owner.c_str(), stat_info.owner.size() + 1);
+
+  /* the group associated with the file */
+  file_info->mGroup = new char[stat_info.group.size() + 1];
+  strncpy(file_info->mGroup, stat_info.group.c_str(), stat_info.group.size() + 1);
+
+  /* the permissions associated with the file encoded as an octal number (0777)*/
+  file_info->mPermissions = (short) stat_info.permissions;
+
+  /* the last access time for the file in seconds since the epoch*/
+  file_info->mLastAccess = stat_info.access_time;
+}
+
+LIBHDFS_C_API
+int hdfsExists(hdfsFS fs, const char *path) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    hdfs::StatInfo stat_info;
+    Status stat = fs->get_impl()->GetFileInfo(*abs_path, stat_info);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+hdfsFileInfo *hdfsGetPathInfo(hdfsFS fs, const char* path) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+       return nullptr;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return nullptr;
+    }
+    hdfs::StatInfo stat_info;
+    Status stat = fs->get_impl()->GetFileInfo(*abs_path, stat_info);
+    if (!stat.ok()) {
+      Error(stat);
+      return nullptr;
+    }
+    hdfsFileInfo *file_info = new hdfsFileInfo[1];
+    StatInfoToHdfsFileInfo(file_info, stat_info);
+    return file_info;
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return nullptr;
+  } catch (...) {
+    ReportCaughtNonException();
+    return nullptr;
+  }
+}
+
+LIBHDFS_C_API
+hdfsFileInfo *hdfsListDirectory(hdfsFS fs, const char* path, int *numEntries) {
+  try {
+      errno = 0;
+      if (!CheckSystem(fs)) {
+        *numEntries = 0;
+        return nullptr;
+      }
+      const optional<std::string> abs_path = getAbsolutePath(fs, path);
+      if(!abs_path) {
+        return nullptr;
+      }
+      std::vector<StatInfo> stat_infos;
+      Status stat = fs->get_impl()->GetListing(*abs_path, &stat_infos);
+      if (!stat.ok()) {
+        Error(stat);
+        *numEntries = 0;
+        return nullptr;
+      }
+      if(stat_infos.empty()){
+        *numEntries = 0;
+        return nullptr;
+      }
+      *numEntries = stat_infos.size();
+      hdfsFileInfo *file_infos = new hdfsFileInfo[stat_infos.size()];
+      for(std::vector<StatInfo>::size_type i = 0; i < stat_infos.size(); i++) {
+        StatInfoToHdfsFileInfo(&file_infos[i], stat_infos.at(i));
+      }
+
+      return file_infos;
+    } catch (const std::exception & e) {
+      ReportException(e);
+      *numEntries = 0;
+      return nullptr;
+    } catch (...) {
+      ReportCaughtNonException();
+      *numEntries = 0;
+      return nullptr;
+    }
+}
+
+LIBHDFS_C_API
+void hdfsFreeFileInfo(hdfsFileInfo *hdfsFileInfo, int numEntries)
+{
+    errno = 0;
+    int i;
+    for (i = 0; i < numEntries; ++i) {
+        delete[] hdfsFileInfo[i].mName;
+        delete[] hdfsFileInfo[i].mOwner;
+        delete[] hdfsFileInfo[i].mGroup;
+    }
+    delete[] hdfsFileInfo;
+}
+
+LIBHDFS_C_API
+int hdfsCreateDirectory(hdfsFS fs, const char* path) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    Status stat;
+    //Use default permissions and set true for creating all non-existant parent directories
+    stat = fs->get_impl()->Mkdirs(*abs_path, FileSystem::GetDefaultPermissionMask(), true);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+int hdfsDelete(hdfsFS fs, const char* path, int recursive) {
+  try {
+      errno = 0;
+      if (!CheckSystem(fs)) {
+        return -1;
+      }
+      const optional<std::string> abs_path = getAbsolutePath(fs, path);
+      if(!abs_path) {
+        return -1;
+      }
+      Status stat;
+      stat = fs->get_impl()->Delete(*abs_path, recursive);
+      if (!stat.ok()) {
+        return Error(stat);
+      }
+      return 0;
+    } catch (const std::exception & e) {
+      return ReportException(e);
+    } catch (...) {
+      return ReportCaughtNonException();
+    }
+}
+
+LIBHDFS_C_API
+int hdfsRename(hdfsFS fs, const char* oldPath, const char* newPath) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    const optional<std::string> old_abs_path = getAbsolutePath(fs, oldPath);
+    const optional<std::string> new_abs_path = getAbsolutePath(fs, newPath);
+    if(!old_abs_path || !new_abs_path) {
+      return -1;
+    }
+    Status stat;
+    stat = fs->get_impl()->Rename(*old_abs_path, *new_abs_path);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+int hdfsChmod(hdfsFS fs, const char* path, short mode){
+  try {
+      errno = 0;
+      if (!CheckSystem(fs)) {
+        return -1;
+      }
+      const optional<std::string> abs_path = getAbsolutePath(fs, path);
+      if(!abs_path) {
+        return -1;
+      }
+      Status stat = FileSystem::CheckValidPermissionMask(mode);
+      if (!stat.ok()) {
+        return Error(stat);
+      }
+      stat = fs->get_impl()->SetPermission(*abs_path, mode);
+      if (!stat.ok()) {
+        return Error(stat);
+      }
+      return 0;
+    } catch (const std::exception & e) {
+      return ReportException(e);
+    } catch (...) {
+      return ReportCaughtNonException();
+    }
+}
+
+LIBHDFS_C_API
+int hdfsChown(hdfsFS fs, const char* path, const char *owner, const char *group){
+  try {
+      errno = 0;
+      if (!CheckSystem(fs)) {
+        return -1;
+      }
+      const optional<std::string> abs_path = getAbsolutePath(fs, path);
+      if(!abs_path) {
+        return -1;
+      }
+      std::string own = (owner) ? owner : "";
+      std::string grp = (group) ? group : "";
+
+      Status stat;
+      stat = fs->get_impl()->SetOwner(*abs_path, own, grp);
+      if (!stat.ok()) {
+        return Error(stat);
+      }
+      return 0;
+    } catch (const std::exception & e) {
+      return ReportException(e);
+    } catch (...) {
+      return ReportCaughtNonException();
+    }
+}
+
+LIBHDFSPP_EXT_API
+hdfsFileInfo * hdfsFind(hdfsFS fs, const char* path, const char* name, uint32_t * numEntries){
+  try {
+      errno = 0;
+      if (!CheckSystem(fs)) {
+        *numEntries = 0;
+        return nullptr;
+      }
+
+      std::vector<StatInfo>  stat_infos;
+      Status stat = fs->get_impl()->Find(path, name, hdfs::FileSystem::GetDefaultFindMaxDepth(), &stat_infos);
+      if (!stat.ok()) {
+        Error(stat);
+        *numEntries = 0;
+        return nullptr;
+      }
+      //Existing API expects nullptr if size is 0
+      if(stat_infos.empty()){
+        *numEntries = 0;
+        return nullptr;
+      }
+      *numEntries = stat_infos.size();
+      hdfsFileInfo *file_infos = new hdfsFileInfo[stat_infos.size()];
+      for(std::vector<StatInfo>::size_type i = 0; i < stat_infos.size(); i++) {
+        StatInfoToHdfsFileInfo(&file_infos[i], stat_infos.at(i));
+      }
+
+      return file_infos;
+    } catch (const std::exception & e) {
+      ReportException(e);
+      *numEntries = 0;
+      return nullptr;
+    } catch (...) {
+      ReportCaughtNonException();
+      *numEntries = 0;
+      return nullptr;
+    }
+}
+
+LIBHDFSPP_EXT_API
+int hdfsCreateSnapshot(hdfsFS fs, const char* path, const char* name) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    Status stat;
+    if(!name){
+      stat = fs->get_impl()->CreateSnapshot(*abs_path, "");
+    } else {
+      stat = fs->get_impl()->CreateSnapshot(*abs_path, name);
+    }
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFSPP_EXT_API
+int hdfsDeleteSnapshot(hdfsFS fs, const char* path, const char* name) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    if (!name) {
+      return Error(Status::InvalidArgument("hdfsDeleteSnapshot: argument 'name' cannot be NULL"));
+    }
+    Status stat;
+    stat = fs->get_impl()->DeleteSnapshot(*abs_path, name);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+
+int hdfsRenameSnapshot(hdfsFS fs, const char* path, const char* old_name, const char* new_name) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    if (!old_name) {
+      return Error(Status::InvalidArgument("hdfsRenameSnapshot: argument 'old_name' cannot be NULL"));
+    }
+    if (!new_name) {
+      return Error(Status::InvalidArgument("hdfsRenameSnapshot: argument 'new_name' cannot be NULL"));
+    }
+    Status stat;
+    stat = fs->get_impl()->RenameSnapshot(*abs_path, old_name, new_name);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+
+}
+
+LIBHDFSPP_EXT_API
+int hdfsAllowSnapshot(hdfsFS fs, const char* path) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    Status stat;
+    stat = fs->get_impl()->AllowSnapshot(*abs_path);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFSPP_EXT_API
+int hdfsDisallowSnapshot(hdfsFS fs, const char* path) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    Status stat;
+    stat = fs->get_impl()->DisallowSnapshot(*abs_path);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+tSize hdfsPread(hdfsFS fs, hdfsFile file, tOffset position, void *buffer,
+                tSize length) {
+  try
+  {
+    errno = 0;
+    if (!CheckSystemAndHandle(fs, file)) {
+      return -1;
+    }
+
+    size_t len = 0;
+    Status stat = file->get_impl()->PositionRead(buffer, length, position, &len);
+    if(!stat.ok()) {
+      return Error(stat);
+    }
+    return (tSize)len;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+tSize hdfsRead(hdfsFS fs, hdfsFile file, void *buffer, tSize length) {
+  try
+  {
+    errno = 0;
+    if (!CheckSystemAndHandle(fs, file)) {
+      return -1;
+    }
+
+    size_t len = 0;
+    Status stat = file->get_impl()->Read(buffer, length, &len);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+
+    return (tSize)len;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+int hdfsUnbufferFile(hdfsFile file) {
+  //Currently we are not doing any buffering
+  CheckHandle(file);
+  return -1;
+}
+
+LIBHDFS_C_API
+int hdfsFileGetReadStatistics(hdfsFile file, struct hdfsReadStatistics **stats) {
+  try
+    {
+      errno = 0;
+      if (!CheckHandle(file)) {
+        return -1;
+      }
+      *stats = new hdfsReadStatistics;
+      memset(*stats, 0, sizeof(hdfsReadStatistics));
+      (*stats)->totalBytesRead = file->get_impl()->get_bytes_read();
+      return 0;
+    } catch (const std::exception & e) {
+      return ReportException(e);
+    } catch (...) {
+      return ReportCaughtNonException();
+    }
+}
+
+LIBHDFS_C_API
+int hdfsFileClearReadStatistics(hdfsFile file) {
+  try
+    {
+      errno = 0;
+      if (!CheckHandle(file)) {
+        return -1;
+      }
+      file->get_impl()->clear_bytes_read();
+      return 0;
+    } catch (const std::exception & e) {
+      return ReportException(e);
+    } catch (...) {
+      return ReportCaughtNonException();
+    }
+}
+
+LIBHDFS_C_API
+int64_t hdfsReadStatisticsGetRemoteBytesRead(const struct hdfsReadStatistics *stats) {
+    return stats->totalBytesRead - stats->totalLocalBytesRead;
+}
+
+LIBHDFS_C_API
+void hdfsFileFreeReadStatistics(struct hdfsReadStatistics *stats) {
+    errno = 0;
+    delete stats;
+}
+
+/* 0 on success, -1 on error*/
+LIBHDFS_C_API
+int hdfsSeek(hdfsFS fs, hdfsFile file, tOffset desiredPos) {
+  try
+  {
+    errno = 0;
+    if (!CheckSystemAndHandle(fs, file)) {
+      return -1;
+    }
+
+    off_t desired = desiredPos;
+    Status stat = file->get_impl()->Seek(&desired, std::ios_base::beg);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+tOffset hdfsTell(hdfsFS fs, hdfsFile file) {
+  try
+  {
+    errno = 0;
+    if (!CheckSystemAndHandle(fs, file)) {
+      return -1;
+    }
+
+    off_t offset = 0;
+    Status stat = file->get_impl()->Seek(&offset, std::ios_base::cur);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+
+    return (tOffset)offset;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+/* extended API */
+int hdfsCancel(hdfsFS fs, hdfsFile file) {
+  try
+  {
+    errno = 0;
+    if (!CheckSystemAndHandle(fs, file)) {
+      return -1;
+    }
+    static_cast<FileHandleImpl*>(file->get_impl())->CancelOperations();
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFSPP_EXT_API
+int hdfsGetBlockLocations(hdfsFS fs, const char *path, struct hdfsBlockLocations ** locations_out)
+{
+  try
+  {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    if (locations_out == nullptr) {
+      ReportError(EINVAL, "Null pointer passed to hdfsGetBlockLocations");
+      return -1;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    std::shared_ptr<FileBlockLocation> ppLocations;
+    Status stat = fs->get_impl()->GetBlockLocations(*abs_path, 0, std::numeric_limits<int64_t>::max(), &ppLocations);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+
+    hdfsBlockLocations *locations = new struct hdfsBlockLocations();
+    (*locations_out) = locations;
+
+    bzero(locations, sizeof(*locations));
+    locations->fileLength = ppLocations->getFileLength();
+    locations->isLastBlockComplete = ppLocations->isLastBlockComplete();
+    locations->isUnderConstruction = ppLocations->isUnderConstruction();
+
+    const std::vector<BlockLocation> & ppBlockLocations = ppLocations->getBlockLocations();
+    locations->num_blocks = ppBlockLocations.size();
+    locations->blocks = new struct hdfsBlockInfo[locations->num_blocks];
+    for (size_t i=0; i < ppBlockLocations.size(); i++) {
+      auto ppBlockLocation = ppBlockLocations[i];
+      auto block = &locations->blocks[i];
+
+      block->num_bytes = ppBlockLocation.getLength();
+      block->start_offset = ppBlockLocation.getOffset();
+
+      const std::vector<DNInfo> & ppDNInfos = ppBlockLocation.getDataNodes();
+      block->num_locations = ppDNInfos.size();
+      block->locations = new hdfsDNInfo[block->num_locations];
+      for (size_t j=0; j < block->num_locations; j++) {
+        auto ppDNInfo = ppDNInfos[j];
+        auto dn_info = &block->locations[j];
+
+        dn_info->xfer_port = ppDNInfo.getXferPort();
+        dn_info->info_port = ppDNInfo.getInfoPort();
+        dn_info->IPC_port  = ppDNInfo.getIPCPort();
+        dn_info->info_secure_port = ppDNInfo.getInfoSecurePort();
+
+        char * buf;
+        buf = new char[ppDNInfo.getHostname().size() + 1];
+        strncpy(buf, ppDNInfo.getHostname().c_str(), ppDNInfo.getHostname().size() + 1);
+        dn_info->hostname = buf;
+
+        buf = new char[ppDNInfo.getIPAddr().size() + 1];
+        strncpy(buf, ppDNInfo.getIPAddr().c_str(), ppDNInfo.getIPAddr().size() + 1);
+        dn_info->ip_address = buf;
+
+        buf = new char[ppDNInfo.getNetworkLocation().size() + 1];
+        strncpy(buf, ppDNInfo.getNetworkLocation().c_str(), ppDNInfo.getNetworkLocation().size() + 1);
+        dn_info->network_location = buf;
+      }
+    }
+
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFSPP_EXT_API
+int hdfsFreeBlockLocations(struct hdfsBlockLocations * blockLocations) {
+  errno = 0;
+  if (blockLocations == nullptr)
+    return 0;
+
+  for (size_t i=0; i < blockLocations->num_blocks; i++) {
+    auto block = &blockLocations->blocks[i];
+    for (size_t j=0; j < block->num_locations; j++) {
+      auto location = &block->locations[j];
+      delete[] location->hostname;
+      delete[] location->ip_address;
+      delete[] location->network_location;
+    }
+  }
+  delete[] blockLocations->blocks;
+  delete blockLocations;
+
+  return 0;
+}
+
+LIBHDFS_C_API
+char*** hdfsGetHosts(hdfsFS fs, const char* path, tOffset start, tOffset length) {
+  try
+    {
+      errno = 0;
+      if (!CheckSystem(fs)) {
+        return nullptr;
+      }
+      const optional<std::string> abs_path = getAbsolutePath(fs, path);
+      if(!abs_path) {
+        return nullptr;
+      }
+      std::shared_ptr<FileBlockLocation> ppLocations;
+      Status stat = fs->get_impl()->GetBlockLocations(*abs_path, start, length, &ppLocations);
+      if (!stat.ok()) {
+        Error(stat);
+        return nullptr;
+      }
+      const std::vector<BlockLocation> & ppBlockLocations = ppLocations->getBlockLocations();
+      char ***hosts = new char**[ppBlockLocations.size() + 1];
+      for (size_t i=0; i < ppBlockLocations.size(); i++) {
+        const std::vector<DNInfo> & ppDNInfos = ppBlockLocations[i].getDataNodes();
+        hosts[i] = new char*[ppDNInfos.size() + 1];
+        for (size_t j=0; j < ppDNInfos.size(); j++) {
+          auto ppDNInfo = ppDNInfos[j];
+          hosts[i][j] = new char[ppDNInfo.getHostname().size() + 1];
+          strncpy(hosts[i][j], ppDNInfo.getHostname().c_str(), ppDNInfo.getHostname().size() + 1);
+        }
+        hosts[i][ppDNInfos.size()] = nullptr;
+      }
+      hosts[ppBlockLocations.size()] = nullptr;
+      return hosts;
+    } catch (const std::exception & e) {
+      ReportException(e);
+      return nullptr;
+    } catch (...) {
+      ReportCaughtNonException();
+      return nullptr;
+    }
+}
+
+LIBHDFS_C_API
+void hdfsFreeHosts(char ***blockHosts) {
+  errno = 0;
+  if (blockHosts == nullptr)
+    return;
+
+  for (size_t i = 0; blockHosts[i]; i++) {
+    for (size_t j = 0; blockHosts[i][j]; j++) {
+      delete[] blockHosts[i][j];
+    }
+    delete[] blockHosts[i];
+  }
+  delete blockHosts;
+}
+
+/*******************************************************************
+ *                EVENT CALLBACKS
+ *******************************************************************/
+
+const char * FS_NN_CONNECT_EVENT = hdfs::FS_NN_CONNECT_EVENT;
+const char * FS_NN_READ_EVENT = hdfs::FS_NN_READ_EVENT;
+const char * FS_NN_WRITE_EVENT = hdfs::FS_NN_WRITE_EVENT;
+
+const char * FILE_DN_CONNECT_EVENT = hdfs::FILE_DN_CONNECT_EVENT;
+const char * FILE_DN_READ_EVENT = hdfs::FILE_DN_READ_EVENT;
+const char * FILE_DN_WRITE_EVENT = hdfs::FILE_DN_WRITE_EVENT;
+
+
+event_response fs_callback_glue(libhdfspp_fs_event_callback handler,
+                      int64_t cookie,
+                      const char * event,
+                      const char * cluster,
+                      int64_t value) {
+  int result = handler(event, cluster, value, cookie);
+  if (result == LIBHDFSPP_EVENT_OK) {
+    return event_response::make_ok();
+  }
+#ifndef LIBHDFSPP_SIMULATE_ERROR_DISABLED
+  if (result == DEBUG_SIMULATE_ERROR) {
+    return event_response::test_err(Status::Error("Simulated error"));
+  }
+#endif
+
+  return event_response::make_ok();
+}
+
+event_response file_callback_glue(libhdfspp_file_event_callback handler,
+                      int64_t cookie,
+                      const char * event,
+                      const char * cluster,
+                      const char * file,
+                      int64_t value) {
+  int result = handler(event, cluster, file, value, cookie);
+  if (result == LIBHDFSPP_EVENT_OK) {
+    return event_response::make_ok();
+  }
+#ifndef LIBHDFSPP_SIMULATE_ERROR_DISABLED
+  if (result == DEBUG_SIMULATE_ERROR) {
+    return event_response::test_err(Status::Error("Simulated error"));
+  }
+#endif
+
+  return event_response::make_ok();
+}
+
+LIBHDFSPP_EXT_API
+int hdfsPreAttachFSMonitor(libhdfspp_fs_event_callback handler, int64_t cookie)
+{
+  fs_event_callback callback = std::bind(fs_callback_glue, handler, cookie, _1, _2, _3);
+  fsEventCallback = callback;
+  return 0;
+}
+
+LIBHDFSPP_EXT_API
+int hdfsPreAttachFileMonitor(libhdfspp_file_event_callback handler, int64_t cookie)
+{
+  file_event_callback callback = std::bind(file_callback_glue, handler, cookie, _1, _2, _3, _4);
+  fileEventCallback = callback;
+  return 0;
+}
+
+/*******************************************************************
+ *                BUILDER INTERFACE
+ *******************************************************************/
+
+HdfsConfiguration LoadDefault(ConfigurationLoader & loader)
+{
+  optional<HdfsConfiguration> result = loader.LoadDefaultResources<HdfsConfiguration>();
+  if (result)
+  {
+    return result.value();
+  }
+  else
+  {
+    return loader.NewConfig<HdfsConfiguration>();
+  }
+}
+
+hdfsBuilder::hdfsBuilder() : config(loader.NewConfig<HdfsConfiguration>())
+{
+  errno = 0;
+  config = LoadDefault(loader);
+}
+
+hdfsBuilder::hdfsBuilder(const char * directory) :
+      config(loader.NewConfig<HdfsConfiguration>())
+{
+  errno = 0;
+  loader.SetSearchPath(directory);
+  config = LoadDefault(loader);
+}
+
+LIBHDFS_C_API
+struct hdfsBuilder *hdfsNewBuilder(void)
+{
+  try
+  {
+    errno = 0;
+    return new struct hdfsBuilder();
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return nullptr;
+  } catch (...) {
+    ReportCaughtNonException();
+    return nullptr;
+  }
+}
+
+LIBHDFS_C_API
+void hdfsBuilderSetNameNode(struct hdfsBuilder *bld, const char *nn)
+{
+  errno = 0;
+  bld->overrideHost = std::string(nn);
+}
+
+LIBHDFS_C_API
+void hdfsBuilderSetNameNodePort(struct hdfsBuilder *bld, tPort port)
+{
+  errno = 0;
+  bld->overridePort = port;
+}
+
+LIBHDFS_C_API
+void hdfsBuilderSetUserName(struct hdfsBuilder *bld, const char *userName)
+{
+  errno = 0;
+  if (userName && *userName) {
+    bld->user = std::string(userName);
+  }
+}
+
+LIBHDFS_C_API
+void hdfsBuilderSetForceNewInstance(struct hdfsBuilder *bld) {
+  //libhdfspp always returns a new instance, so nothing to do
+  (void)bld;
+  errno = 0;
+}
+
+LIBHDFS_C_API
+void hdfsFreeBuilder(struct hdfsBuilder *bld)
+{
+  try
+  {
+    errno = 0;
+    delete bld;
+  } catch (const std::exception & e) {
+    ReportException(e);
+  } catch (...) {
+    ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+int hdfsBuilderConfSetStr(struct hdfsBuilder *bld, const char *key,
+                          const char *val)
+{
+  try
+  {
+    errno = 0;
+    optional<HdfsConfiguration> newConfig = bld->loader.OverlayValue(bld->config, key, val);
+    if (newConfig)
+    {
+      bld->config = newConfig.value();
+      return 0;
+    }
+    else
+    {
+      ReportError(EINVAL, "Could not change Builder value");
+      return -1;
+    }
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+void hdfsConfStrFree(char *val)
+{
+  errno = 0;
+  free(val);
+}
+
+LIBHDFS_C_API
+hdfsFS hdfsBuilderConnect(struct hdfsBuilder *bld) {
+  hdfsFS fs = doHdfsConnect(bld->overrideHost, bld->overridePort, bld->user, bld->config.GetOptions());
+  // Always free the builder
+  hdfsFreeBuilder(bld);
+  return fs;
+}
+
+LIBHDFS_C_API
+int hdfsConfGetStr(const char *key, char **val)
+{
+  try
+  {
+    errno = 0;
+    hdfsBuilder builder;
+    return hdfsBuilderConfGetStr(&builder, key, val);
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+int hdfsConfGetInt(const char *key, int32_t *val)
+{
+  try
+  {
+    errno = 0;
+    hdfsBuilder builder;
+    return hdfsBuilderConfGetInt(&builder, key, val);
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+//
+//  Extended builder interface
+//
+struct hdfsBuilder *hdfsNewBuilderFromDirectory(const char * configDirectory)
+{
+  try
+  {
+    errno = 0;
+    return new struct hdfsBuilder(configDirectory);
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return nullptr;
+  } catch (...) {
+    ReportCaughtNonException();
+    return nullptr;
+  }
+}
+
+LIBHDFSPP_EXT_API
+int hdfsBuilderConfGetStr(struct hdfsBuilder *bld, const char *key,
+                          char **val)
+{
+  try
+  {
+    errno = 0;
+    optional<std::string> value = bld->config.Get(key);
+    if (value)
+    {
+      size_t len = value->length() + 1;
+      *val = static_cast<char *>(malloc(len));
+      strncpy(*val, value->c_str(), len);
+    }
+    else
+    {
+      *val = nullptr;
+    }
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+// If we're running on a 32-bit platform, we might get 64-bit values that
+//    don't fit in an int, and int is specified by the java hdfs.h interface
+bool isValidInt(int64_t value)
+{
+  return (value >= std::numeric_limits<int>::min() &&
+          value <= std::numeric_limits<int>::max());
+}
+
+LIBHDFSPP_EXT_API
+int hdfsBuilderConfGetInt(struct hdfsBuilder *bld, const char *key, int32_t *val)
+{
+  try
+  {
+    errno = 0;
+    // Pull from default configuration
+    optional<int64_t> value = bld->config.GetInt(key);
+    if (value)
+    {
+      if (!isValidInt(*value)){
+        ReportError(EINVAL, "Builder value is not valid");
+        return -1;
+      }
+      *val = *value;
+      return 0;
+    }
+    // If not found, don't change val
+    ReportError(EINVAL, "Could not get Builder value");
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFSPP_EXT_API
+int hdfsBuilderConfGetLong(struct hdfsBuilder *bld, const char *key, int64_t *val)
+{
+  try
+  {
+    errno = 0;
+    // Pull from default configuration
+    optional<int64_t> value = bld->config.GetInt(key);
+    if (value)
+    {
+      *val = *value;
+      return 0;
+    }
+    // If not found, don't change val
+    ReportError(EINVAL, "Could not get Builder value");
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+/**
+ * Logging functions
+ **/
+class CForwardingLogger : public LoggerInterface {
+ public:
+  CForwardingLogger() : callback_(nullptr) {};
+
+  // Converts LogMessage into LogData, a POD type,
+  // and invokes callback_ if it's not null.
+  void Write(const LogMessage& msg);
+
+  // pass in NULL to clear the hook
+  void SetCallback(void (*callback)(LogData*));
+
+  //return a copy, or null on failure.
+  static LogData *CopyLogData(const LogData*);
+  //free LogData allocated with CopyLogData
+  static void FreeLogData(LogData*);
+ private:
+  void (*callback_)(LogData*);
+};
+
+/**
+ *  Plugin to forward message to a C function pointer
+ **/
+void CForwardingLogger::Write(const LogMessage& msg) {
+  if(!callback_)
+    return;
+
+  const std::string text = msg.MsgString();
+
+  LogData data;
+  data.level = msg.level();
+  data.component = msg.component();
+  data.msg = text.c_str();
+  data.file_name = msg.file_name();
+  data.file_line = msg.file_line();
+  callback_(&data);
+}
+
+void CForwardingLogger::SetCallback(void (*callback)(LogData*)) {
+  callback_ = callback;
+}
+
+LogData *CForwardingLogger::CopyLogData(const LogData *orig) {
+  if(!orig)
+    return nullptr;
+
+  LogData *copy = (LogData*)malloc(sizeof(LogData));
+  if(!copy)
+    return nullptr;
+
+  copy->level = orig->level;
+  copy->component = orig->component;
+  if(orig->msg)
+    copy->msg = strdup(orig->msg);
+  copy->file_name = orig->file_name;
+  copy->file_line = orig->file_line;
+  return copy;
+}
+
+void CForwardingLogger::FreeLogData(LogData *data) {
+  if(!data)
+    return;
+  if(data->msg)
+    free((void*)data->msg);
+
+  // Inexpensive way to help catch use-after-free
+  memset(data, 0, sizeof(LogData));
+  free(data);
+}
+
+LIBHDFSPP_EXT_API
+LogData *hdfsCopyLogData(LogData *data) {
+  return CForwardingLogger::CopyLogData(data);
+}
+
+LIBHDFSPP_EXT_API
+void hdfsFreeLogData(LogData *data) {
+  CForwardingLogger::FreeLogData(data);
+}
+
+LIBHDFSPP_EXT_API
+void hdfsSetLogFunction(void (*callback)(LogData*)) {
+  CForwardingLogger *logger = new CForwardingLogger();
+  logger->SetCallback(callback);
+  LogManager::SetLoggerImplementation(std::unique_ptr<LoggerInterface>(logger));
+}
+
+static bool IsLevelValid(int component) {
+  if(component < HDFSPP_LOG_LEVEL_TRACE || component > HDFSPP_LOG_LEVEL_ERROR)
+    return false;
+  return true;
+}
+
+
+//  should use  __builtin_popcnt as optimization on some platforms
+static int popcnt(int val) {
+  int bits = sizeof(val) * 8;
+  int count = 0;
+  for(int i=0; i<bits; i++) {
+    if((val >> i) & 0x1)
+      count++;
+  }
+  return count;
+}
+
+static bool IsComponentValid(int component) {
+  if(component < HDFSPP_LOG_COMPONENT_UNKNOWN || component > HDFSPP_LOG_COMPONENT_FILESYSTEM)
+    return false;
+  if(popcnt(component) != 1)
+    return false;
+  return true;
+}
+
+LIBHDFSPP_EXT_API
+int hdfsEnableLoggingForComponent(int component) {
+  errno = 0;
+  if(!IsComponentValid(component))
+    return -1;
+  LogManager::EnableLogForComponent(static_cast<LogSourceComponent>(component));
+  return 0;
+}
+
+LIBHDFSPP_EXT_API
+int hdfsDisableLoggingForComponent(int component) {
+  errno = 0;
+  if(!IsComponentValid(component))
+    return -1;
+  LogManager::DisableLogForComponent(static_cast<LogSourceComponent>(component));
+  return 0;
+}
+
+LIBHDFSPP_EXT_API
+int hdfsSetLoggingLevel(int level) {
+  errno = 0;
+  if(!IsLevelValid(level))
+    return -1;
+  LogManager::SetLogLevel(static_cast<LogLevel>(level));
+  return 0;
+}
+
+#undef LIBHDFS_C_API
+#undef LIBHDFSPP_EXT_API
+
+

+ 24 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/CMakeLists.txt

@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+if(NEED_LINK_DL)
+   set(LIB_DL dl)
+endif()
+
+add_library(common_obj OBJECT status.cc sasl_digest_md5.cc hdfs_ioservice.cc options.cc configuration.cc configuration_loader.cc hdfs_configuration.cc uri.cc util.cc retry_policy.cc cancel_tracker.cc logging.cc libhdfs_events_impl.cc auth_info.cc namenode_info.cc statinfo.cc fsinfo.cc content_summary.cc locks.cc config_parser.cc)
+add_library(common $<TARGET_OBJECTS:common_obj> $<TARGET_OBJECTS:uriparser2_obj>)
+target_link_libraries(common ${LIB_DL})

+ 49 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/async_stream.h

@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIB_COMMON_ASYNC_STREAM_H_
+#define LIB_COMMON_ASYNC_STREAM_H_
+
+#include <asio.hpp>
+
+namespace hdfs {
+
+typedef asio::mutable_buffers_1 MutableBuffers;
+typedef asio::const_buffers_1   ConstBuffers;
+
+/*
+ * asio-compatible stream implementation.
+ *
+ * Lifecycle: should be managed using std::shared_ptr so the object can be
+ *    handed from consumer to consumer
+ * Threading model: async_read_some and async_write_some are not thread-safe.
+ */
+class AsyncStream  {
+public:
+  virtual void async_read_some(const MutableBuffers &buf,
+          std::function<void (const asio::error_code & error,
+                                 std::size_t bytes_transferred) > handler) = 0;
+
+  virtual void async_write_some(const ConstBuffers &buf,
+            std::function<void (const asio::error_code & error,
+                                 std::size_t bytes_transferred) > handler) = 0;
+};
+
+}
+
+#endif

+ 18 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/auth_info.cc

@@ -0,0 +1,18 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "auth_info.h"

+ 90 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/auth_info.h

@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIB_FS_AUTHINFO_H
+#define LIB_FS_AUTHINFO_H
+
+#include "common/optional_wrapper.h"
+
+namespace hdfs {
+
+class Token {
+public:
+  std::string identifier;
+  std::string password;
+};
+
+class AuthInfo {
+public:
+    enum AuthMethod {
+        kSimple,
+        kKerberos,
+        kToken,
+        kUnknownAuth,
+        kAuthFailed
+    };
+
+    AuthInfo() :
+        method(kSimple) {
+    }
+
+    explicit AuthInfo(AuthMethod mech) :
+        method(mech) {
+    }
+
+    bool useSASL() {
+        return method != kSimple;
+    }
+
+    const std::string & getUser() const {
+        return user;
+    }
+
+    void setUser(const std::string & user) {
+        this->user = user;
+    }
+
+    AuthMethod getMethod() const {
+        return method;
+    }
+
+    void setMethod(AuthMethod method) {
+        this->method = method;
+    }
+
+    const std::experimental::optional<Token> & getToken() const {
+        return token;
+    }
+
+    void setToken(const Token & token) {
+        this->token = token;
+    }
+
+    void clearToken() {
+        this->token = std::experimental::nullopt;
+    }
+
+private:
+    AuthMethod method;
+    std::string user;
+    std::experimental::optional<Token> token;
+};
+
+}
+
+#endif /* RPCAUTHINFO_H */

+ 37 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/cancel_tracker.cc

@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#include "cancel_tracker.h"
+
+namespace hdfs {
+
+CancelTracker::CancelTracker() : canceled_(false) {}
+
+std::shared_ptr<CancelTracker> CancelTracker::New() {
+  return std::make_shared<CancelTracker>();
+}
+
+bool CancelTracker::is_canceled() {
+  return canceled_;
+}
+
+void CancelTracker::set_canceled() {
+  canceled_ = true;
+}
+
+}

+ 40 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/cancel_tracker.h

@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#ifndef COMMON_CANCELTRACKER_H
+#define COMMON_CANCELTRACKER_H
+
+#include <memory>
+#include <atomic>
+
+namespace hdfs {
+
+class CancelTracker : public std::enable_shared_from_this<CancelTracker> {
+ public:
+  CancelTracker();
+  static std::shared_ptr<CancelTracker> New();
+  void set_canceled();
+  bool is_canceled();
+ private:
+  std::atomic_bool canceled_;
+};
+
+typedef std::shared_ptr<CancelTracker> CancelHandle;
+
+}
+#endif

+ 219 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/config_parser.cc

@@ -0,0 +1,219 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hdfspp/config_parser.h"
+#include "common/hdfs_configuration.h"
+#include "common/configuration_loader.h"
+
+#include <string>
+#include <memory>
+#include <vector>
+#include <numeric>
+
+namespace hdfs {
+
+static const char kSearchPathSeparator = ':';
+
+HdfsConfiguration LoadDefault(ConfigurationLoader & loader)
+{
+  optional<HdfsConfiguration> result = loader.LoadDefaultResources<HdfsConfiguration>();
+  if (result)
+  {
+    return result.value();
+  }
+  else
+  {
+    return loader.NewConfig<HdfsConfiguration>();
+  }
+}
+
+class ConfigParser::impl {
+ public:
+  impl() :
+      config_(loader_.NewConfig<HdfsConfiguration>()) {
+  }
+
+  impl(const std::vector<std::string>& dirs) :
+      config_(loader_.NewConfig<HdfsConfiguration>()) {
+
+      // Convert vector of paths into ':' separated path
+      std::string path = std::accumulate(dirs.begin(), dirs.end(), std::string(""),
+        [](std::string cumm, std::string elem) {return cumm + kSearchPathSeparator + elem;});
+      loader_.SetSearchPath(path);
+      config_ = LoadDefault(loader_);
+  }
+
+  impl(const std::string& path) :
+      config_(loader_.NewConfig<HdfsConfiguration>()) {
+
+      loader_.SetSearchPath(path);
+      config_ = LoadDefault(loader_);
+  }
+
+  bool LoadDefaultResources() {
+    config_ = LoadDefault(loader_);
+    return true;
+  }
+
+  std::vector<std::pair<std::string, Status> > ValidateResources() const {
+    return loader_.ValidateDefaultResources<HdfsConfiguration>();
+  }
+
+  bool get_int(const std::string& key, int& outval) const {
+    auto ret = config_.GetInt(key);
+    if (!ret) {
+      return false;
+    } else {
+      outval = *ret;
+      return true;
+    }
+  }
+
+  bool get_string(const std::string& key, std::string& outval) const {
+    auto ret = config_.Get(key);
+    if (!ret) {
+      return false;
+    } else {
+      outval = *ret;
+      return true;
+    }
+  }
+
+  bool get_bool(const std::string& key, bool& outval) const {
+    auto ret = config_.GetBool(key);
+    if (!ret) {
+      return false;
+    } else {
+      outval = *ret;
+      return true;
+    }
+  }
+
+  bool get_double(const std::string& key, double& outval) const {
+    auto ret = config_.GetDouble(key);
+    if (!ret) {
+      return false;
+    } else {
+      outval = *ret;
+      return true;
+    }
+  }
+
+  bool get_uri(const std::string& key, URI& outval) const {
+    auto ret = config_.GetUri(key);
+    if (!ret) {
+      return false;
+    } else {
+      outval = *ret;
+      return true;
+    }
+  }
+
+  bool get_options(Options& outval) {
+    outval = config_.GetOptions();
+    return true;
+  }
+
+ private:
+  ConfigurationLoader loader_;
+  HdfsConfiguration config_;
+};
+
+
+ConfigParser::ConfigParser() {
+  pImpl.reset(new ConfigParser::impl());
+}
+
+ConfigParser::ConfigParser(const std::vector<std::string>& configDirectories) {
+  pImpl.reset(new ConfigParser::impl(configDirectories));
+}
+
+ConfigParser::ConfigParser(const std::string& path) {
+  pImpl.reset(new ConfigParser::impl(path));
+}
+
+ConfigParser::~ConfigParser() = default;
+ConfigParser::ConfigParser(ConfigParser&&) = default;
+ConfigParser& ConfigParser::operator=(ConfigParser&&) = default;
+
+bool ConfigParser::LoadDefaultResources() { return pImpl->LoadDefaultResources(); }
+std::vector<std::pair<std::string, Status> > ConfigParser::ValidateResources() const { return pImpl->ValidateResources();}
+
+bool ConfigParser::get_int(const std::string& key, int& outval) const { return pImpl->get_int(key, outval); }
+int ConfigParser::get_int_or(const std::string& key, const int defaultval) const {
+  int res = 0;
+  if(get_int(key, res)) {
+    return res;
+  } else {
+    return defaultval;
+  }
+}
+
+bool ConfigParser::get_string(const std::string& key, std::string& outval) const { return pImpl->get_string(key, outval); }
+std::string ConfigParser::get_string_or(const std::string& key, const std::string& defaultval) const {
+  std::string res;
+  if(get_string(key, res)) {
+    return res;
+  } else {
+    return defaultval;
+  }
+}
+
+bool ConfigParser::get_bool(const std::string& key, bool& outval) const { return pImpl->get_bool(key, outval); }
+bool ConfigParser::get_bool_or(const std::string& key, const bool defaultval) const {
+  bool res = false;
+  if(get_bool(key, res)) {
+    return res;
+  } else {
+    return defaultval;
+  }
+}
+
+bool ConfigParser::get_double(const std::string& key, double& outval) const { return pImpl->get_double(key, outval); }
+double ConfigParser::get_double_or(const std::string& key, const double defaultval) const {
+  double res = 0;
+  if(get_double(key, res)) {
+    return res;
+  } else {
+    return defaultval;
+  }
+}
+
+bool ConfigParser::get_uri(const std::string& key, URI& outval) const { return pImpl->get_uri(key, outval); }
+URI ConfigParser::get_uri_or(const std::string& key, const URI& defaultval) const {
+  URI res;
+  if(get_uri(key, res)) {
+    return res;
+  } else {
+    res = defaultval;
+    return res;
+  }
+}
+
+bool ConfigParser::get_options(Options& outval) const { return pImpl->get_options(outval); }
+Options ConfigParser::get_options_or(const Options& defaultval) const {
+  Options res;
+  if(get_options(res)) {
+    return res;
+  } else {
+    res = defaultval;
+    return res;
+  }
+}
+
+} // end namespace hdfs

+ 169 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration.cc

@@ -0,0 +1,169 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * The following features are not currently implemented
+ * - Deprecated values
+ * - Make filename and config file contents unicode-safe
+ * - Config redirection/environment substitution
+ *
+ * - getInts (comma separated))
+ * - getStrings (comma separated))
+ * - getIntegerRange
+ * - getSocketAddr
+ * - getTimeDuration
+ * - getBytes (e.g. 1M or 1G)
+ * - hex values
+ */
+
+#include "configuration.h"
+#include "hdfspp/uri.h"
+
+#include <strings.h>
+#include <sstream>
+#include <map>
+#include <rapidxml/rapidxml.hpp>
+#include <rapidxml/rapidxml_utils.hpp>
+
+namespace hdfs {
+
+/*
+ * Configuration class
+ */
+std::vector<std::string> Configuration::GetDefaultFilenames() {
+  auto result = std::vector<std::string>();
+  result.push_back("core-site.xml");
+  return result;
+}
+
+
+optional<std::string> Configuration::Get(const std::string& key) const {
+  std::string caseFixedKey = fixCase(key);
+  auto found = raw_values_.find(caseFixedKey);
+  if (found != raw_values_.end()) {
+    return std::experimental::make_optional(found->second.value);
+  } else {
+    return optional<std::string>();
+  }
+}
+
+std::string Configuration::GetWithDefault(
+    const std::string& key, const std::string& default_value) const {
+  return Get(key).value_or(default_value);
+}
+
+optional<int64_t> Configuration::GetInt(const std::string& key) const {
+  auto raw = Get(key);
+  if (raw) {
+    errno = 0;
+    char* end = nullptr;
+    optional<int64_t> result =
+        std::experimental::make_optional(static_cast<int64_t>(strtol(raw->c_str(), &end, 10)));
+    if (end == raw->c_str()) {
+      /* strtoll will set end to input if no conversion was done */
+      return optional<int64_t>();
+    }
+    if (errno == ERANGE) {
+      return optional<int64_t>();
+    }
+
+    return result;
+  } else {
+    return optional<int64_t>();
+  }
+}
+
+int64_t Configuration::GetIntWithDefault(const std::string& key,
+                                         int64_t default_value) const {
+  return GetInt(key).value_or(default_value);
+}
+
+optional<double> Configuration::GetDouble(const std::string& key) const {
+  auto raw = Get(key);
+  if (raw) {
+    errno = 0;
+    char* end = nullptr;
+    auto result = std::experimental::make_optional(strtod(raw->c_str(), &end));
+    if (end == raw->c_str()) {
+      /* strtod will set end to input if no conversion was done */
+      return optional<double>();
+    }
+    if (errno == ERANGE) {
+      return optional<double>();
+    }
+
+    return result;
+  } else {
+    return optional<double>();
+  }
+}
+
+double Configuration::GetDoubleWithDefault(const std::string& key,
+                                           double default_value) const {
+  return GetDouble(key).value_or(default_value);
+}
+
+optional<bool> Configuration::GetBool(const std::string& key) const {
+  auto raw = Get(key);
+  if (!raw) {
+    return optional<bool>();
+  }
+
+  if (!strcasecmp(raw->c_str(), "true")) {
+    return std::experimental::make_optional(true);
+  }
+  if (!strcasecmp(raw->c_str(), "false")) {
+    return std::experimental::make_optional(false);
+  }
+
+  return optional<bool>();
+}
+
+bool Configuration::GetBoolWithDefault(const std::string& key,
+                                       bool default_value) const {
+  return GetBool(key).value_or(default_value);
+}
+
+optional<URI> Configuration::GetUri(const std::string& key) const {
+  optional<std::string> raw = Get(key);
+  if (raw) {
+    try {
+      return std::experimental::make_optional(URI::parse_from_string(*raw));
+    } catch (const uri_parse_error& e) {
+      // Return empty below
+    }
+  }
+  return optional<URI>();
+}
+
+URI Configuration::GetUriWithDefault(const std::string& key,
+                                     std::string default_value) const {
+  optional<URI> result = GetUri(key);
+  if (result) {
+    return *result;
+  } else {
+    try {
+      return URI::parse_from_string(default_value);
+    } catch (const uri_parse_error& e) {
+      return URI();
+    }
+  }
+}
+
+
+}

+ 108 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration.h

@@ -0,0 +1,108 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_CONFIGURATION_H_
+#define COMMON_CONFIGURATION_H_
+
+#include "hdfspp/uri.h"
+
+#include <string>
+#include <map>
+#include <vector>
+#include <set>
+#include <istream>
+#include <stdint.h>
+#include "common/optional_wrapper.h"
+
+namespace hdfs {
+
+template <class T>
+using optional = std::experimental::optional<T>;
+
+/**
+ * Configuration class that parses XML.
+ *
+ * Files should be an XML file of the form
+ * <configuration>
+ *  <property>
+ *    <name>Name</name>
+ *    <value>Value</value>
+ *  </property>
+ * <configuration>
+ *
+ * Configuration objects should be created via the ConfigurationLoader class.
+ * Configuration objects are immutable and can be shared between threads.
+ *
+ * This class is thread-safe.
+ */
+class Configuration {
+ public:
+  // Gets values
+  std::string           GetWithDefault(const std::string &key,
+                                       const std::string &default_value) const;
+  optional<std::string> Get(const std::string &key) const;
+  int64_t               GetIntWithDefault(const std::string &key,
+                                          int64_t default_value) const;
+  optional<int64_t>     GetInt(const std::string &key) const;
+  double                GetDoubleWithDefault(const std::string &key,
+                                             double default_value) const;
+  optional<double>      GetDouble(const std::string &key) const;
+  bool                  GetBoolWithDefault(const std::string &key,
+                                           bool default_value) const;
+  optional<bool>        GetBool(const std::string &key) const;
+  URI                   GetUriWithDefault(const std::string &key,
+                                          std::string default_value) const;
+  optional<URI>         GetUri(const std::string &key) const;
+
+protected:
+   friend class ConfigurationLoader;
+
+  /* Transparent data holder for property values */
+  struct ConfigData {
+    std::string value;
+    bool final;
+    ConfigData() : final(false){}
+    ConfigData(const std::string &value_) : value(value_), final(false) {}
+    void operator=(const std::string &new_value) {
+      value = new_value;
+      final = false;
+    }
+  };
+  typedef std::map<std::string, ConfigData> ConfigMap;
+
+  Configuration() {}
+  Configuration(ConfigMap &src_map) : raw_values_(src_map){}
+  Configuration(const ConfigMap &src_map) : raw_values_(src_map){}
+
+  static std::vector<std::string> GetDefaultFilenames();
+
+  // While we want this to be const, it would preclude copying Configuration
+  //    objects.  The Configuration class must not allow any mutations of
+  //    the raw_values
+  ConfigMap raw_values_;
+
+  static std::string fixCase(const std::string &in) {
+    std::string result(in);
+    for (auto & c: result) c = static_cast<char>(toupper(c));
+    return result;
+  }
+};
+
+}
+
+#endif

+ 328 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration_loader.cc

@@ -0,0 +1,328 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "configuration_loader.h"
+#include "common/logging.h"
+
+#include <fstream>
+#include <strings.h>
+#include <sstream>
+#include <map>
+#include <sys/stat.h>
+#include <rapidxml/rapidxml.hpp>
+#include <rapidxml/rapidxml_utils.hpp>
+
+namespace hdfs {
+
+/*
+ * ConfigurationLoader class
+ */
+
+#if defined(WIN32) || defined(_WIN32)
+static const char kFileSeparator = '\\';
+#else
+static const char kFileSeparator = '/';
+#endif
+
+static const char kSearchPathSeparator = ':';
+
+bool is_valid_bool(const std::string& raw) {
+  if (raw.empty()) {
+    return false;
+  }
+
+  if (!strcasecmp(raw.c_str(), "true")) {
+    return true;
+  }
+  if (!strcasecmp(raw.c_str(), "false")) {
+    return true;
+  }
+  return false;
+}
+
+bool str_to_bool(const std::string& raw) {
+  if (!strcasecmp(raw.c_str(), "true")) {
+    return true;
+  }
+
+  return false;
+}
+
+ConfigurationLoader::ConfigurationLoader() {
+  //In order to creates a configuration loader with the default search path
+  //("$HADOOP_CONF_DIR" or "/etc/hadoop/conf") we call SetDefaultSearchPath().
+  ConfigurationLoader::SetDefaultSearchPath();
+}
+
+void ConfigurationLoader::SetDefaultSearchPath() {
+  // Try (in order, taking the first valid one):
+  //    $HADOOP_CONF_DIR
+  //    /etc/hadoop/conf
+  const char * hadoop_conf_dir_env = getenv("HADOOP_CONF_DIR");
+  if (hadoop_conf_dir_env) {
+    std::stringstream ss(hadoop_conf_dir_env);
+    std::string path;
+    while (std::getline(ss, path, kSearchPathSeparator)) {
+      AddToSearchPath(path);
+    }
+  } else {
+    AddToSearchPath("/etc/hadoop/conf");
+  }
+}
+
+void ConfigurationLoader::ClearSearchPath()
+{
+  search_path_.clear();
+}
+
+void ConfigurationLoader::SetSearchPath(const std::string & searchPath)
+{
+  search_path_.clear();
+
+  std::vector<std::string> paths;
+  std::string::size_type start = 0;
+  std::string::size_type end = searchPath.find(kSearchPathSeparator);
+
+  while (end != std::string::npos) {
+     paths.push_back(searchPath.substr(start, end-start));
+     start = ++end;
+     end = searchPath.find(kSearchPathSeparator, start);
+  }
+  paths.push_back(searchPath.substr(start, searchPath.length()));
+
+  for (auto path: paths) {
+    AddToSearchPath(path);
+  }
+
+}
+
+void ConfigurationLoader::AddToSearchPath(const std::string & searchPath)
+{
+  if (searchPath.empty())
+    return;
+
+  if (searchPath.back() != kFileSeparator) {
+    std::string pathWithSlash(searchPath);
+    pathWithSlash += kFileSeparator;
+    search_path_.push_back(pathWithSlash);
+  } else {
+    search_path_.push_back(searchPath);
+  }
+}
+
+std::string ConfigurationLoader::GetSearchPath()
+{
+  std::stringstream result;
+  bool first = true;
+  for(std::string item: search_path_) {
+    if (!first) {
+      result << kSearchPathSeparator;
+    }
+
+    result << item;
+    first = false;
+  }
+
+  return result.str();
+}
+
+Status validateStream(std::istream & stream) {
+  std::streampos start = stream.tellg();
+  stream.seekg(0, std::ios::end);
+  std::streampos end = stream.tellg();
+  stream.seekg(start, std::ios::beg);
+
+  int length = end - start;
+
+  if (length <= 0 || start == -1 || end == -1)
+    return Status::Error("The configuration file is empty");
+
+  LOG_DEBUG(kFileSystem, << "validateStream will read a config file of length " << length);
+
+  std::vector<char> raw_bytes((int64_t)length + 1);
+  stream.read(&raw_bytes[0], length);
+  raw_bytes[length] = 0;
+
+  try {
+    rapidxml::xml_document<> dom;
+    dom.parse<rapidxml::parse_trim_whitespace|rapidxml::parse_validate_closing_tags>(&raw_bytes[0]);
+
+    /* File must contain a single <configuration> stanza */
+    auto config_node = dom.first_node("configuration", 0, false);
+    if (!config_node) {
+      return Status::Error("The configuration file is missing a 'configuration' tag");
+    }
+    return Status::OK();
+  } catch (const rapidxml::parse_error &e) {
+    size_t location = e.where<char>() - &raw_bytes[0];
+    std::string msg = "The configuration file has invalid xml around character " + std::to_string(location);
+    return Status::Error(msg.c_str());
+  }
+}
+
+std::vector<std::pair<std::string, Status> > ConfigurationLoader::ValidateResources(std::vector<std::string> filenames) const
+{
+  std::vector<std::pair<std::string, Status> > stats;
+  bool found;
+  for(auto file: filenames) {
+    found = false;
+    for(auto dir: search_path_) {
+      std::ifstream stream(dir + file);
+      if ( stream.is_open() ) {
+        found = true;
+        stats.push_back(std::make_pair(file,validateStream(stream)));
+      } else {
+        LOG_DEBUG(kFileSystem, << dir << file << " was not found");
+      }
+    }
+    if(!found) {
+      std::string msg("No directory in the current search path contains the file [" + file + "]");
+      stats.push_back(std::make_pair(file,Status::PathNotFound(msg.c_str())));
+    }
+  }
+  return stats;
+}
+
+bool ConfigurationLoader::UpdateMapWithFile(ConfigMap & map, const std::string & path) const
+{
+  if (path.front() == kFileSeparator) { // Absolute path
+    std::ifstream stream(path, std::ifstream::in);
+    if ( stream.is_open() ) {
+      return UpdateMapWithStream(map, stream);
+    } else {
+      return false;
+    }
+  } else { // Use search path
+    for(auto dir: search_path_) {
+      std::ifstream stream(dir + path);
+      if ( stream.is_open() ) {
+        if (UpdateMapWithStream(map, stream))
+          return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool ConfigurationLoader::UpdateMapWithStream(ConfigMap & map,
+                                              std::istream & stream) {
+  std::streampos start = stream.tellg();
+  stream.seekg(0, std::ios::end);
+  std::streampos end = stream.tellg();
+  stream.seekg(start, std::ios::beg);
+
+  int length = end - start;
+
+  if (length <= 0 || start == -1 || end == -1)
+    return false;
+
+  std::vector<char> raw_bytes((int64_t)length + 1);
+  stream.read(&raw_bytes[0], length);
+  raw_bytes[length] = 0;
+
+  return UpdateMapWithBytes(map, raw_bytes);
+}
+
+bool ConfigurationLoader::UpdateMapWithString(ConfigMap & map,
+                                                   const std::string &xml_data) {
+  if (xml_data.size() == 0) {
+    return false;
+  }
+
+  std::vector<char> raw_bytes(xml_data.begin(), xml_data.end());
+  raw_bytes.push_back('\0');
+
+  bool success = UpdateMapWithBytes(map, raw_bytes);
+
+  if (success) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool ConfigurationLoader::UpdateMapWithBytes(ConfigMap& map,
+                                                 std::vector<char>& raw_bytes) {
+  try {
+    rapidxml::xml_document<> dom;
+    dom.parse<rapidxml::parse_trim_whitespace>(&raw_bytes[0]);
+
+    /* File must contain a single <configuration> stanza */
+    auto config_node = dom.first_node("configuration", 0, false);
+    if (!config_node) {
+      return false;
+    }
+
+    /* Walk all of the <property> nodes, ignoring the rest */
+    for (auto property_node = config_node->first_node("property", 0, false);
+         property_node;
+         property_node = property_node->next_sibling("property", 0, false)) {
+      auto name_node = property_node->first_node("name", 0, false);
+      auto value_node = property_node->first_node("value", 0, false);
+
+      if (name_node && value_node) {
+        std::string final_value;
+        auto final_node = property_node->first_node("final", 0, false);
+        if (final_node) {
+          final_value = final_node->value();
+        }
+        UpdateMapWithValue(map, name_node->value(), value_node->value(), final_value);
+      }
+
+      auto name_attr = property_node->first_attribute("name", 0, false);
+      auto value_attr = property_node->first_attribute("value", 0, false);
+
+      if (name_attr && value_attr) {
+        std::string final_value;
+        auto final_attr = property_node->first_attribute("final", 0, false);
+        if (final_attr) {
+          final_value = final_attr->value();
+        }
+        UpdateMapWithValue(map, name_attr->value(), value_attr->value(), final_value);
+      }
+    }
+
+    return true;
+  } catch (const rapidxml::parse_error &e) {
+    // TODO: Capture the result in a Status object
+    return false;
+  }
+}
+
+bool ConfigurationLoader::UpdateMapWithValue(ConfigMap& map,
+                                             const std::string& key, const std::string& value,
+                                             const std::string& final_text)
+{
+  std::string caseFixedKey = Configuration::fixCase(key);
+  auto mapValue = map.find(caseFixedKey);
+  if (mapValue != map.end() && mapValue->second.final) {
+    return false;
+  }
+
+  bool final_value = false;
+  if (is_valid_bool(final_text)) {
+    final_value = str_to_bool(final_text);
+  }
+
+  map[caseFixedKey].value = value;
+  map[caseFixedKey].final = final_value;
+  return true;
+}
+
+}

+ 138 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration_loader.h

@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_CONFIGURATION_BUILDER_H_
+#define COMMON_CONFIGURATION_BUILDER_H_
+
+#include "configuration.h"
+#include "hdfspp/status.h"
+
+namespace hdfs {
+
+
+class ConfigurationLoader {
+public:
+  // Creates a new, empty Configuration object
+  // T must be Configuration or a subclass
+  template<class T>
+  T NewConfig();
+
+  /****************************************************************************
+   *                    LOADING CONFIG FILES
+   ***************************************************************************/
+
+  // Loads Configuration XML contained in a string/stream/file and returns a parsed
+  //    Configuration object.
+  //    T must be Configuration or a subclass
+  template<class T>
+  optional<T> Load(const std::string &xml_data);
+  // Streams must be seekable
+  template<class T>
+  optional<T> LoadFromStream(std::istream & stream);
+  // The ConfigurationBuilder's search path will be searched for the filename
+  //    unless it is an absolute path
+  template<class T>
+  optional<T> LoadFromFile(const std::string &filename);
+
+  // Loads Configuration XML contained in a string and produces a new copy that
+  //    is the union of the src and xml_data
+  //    Any parameters from src will be overwritten by the xml_data unless they
+  //    are marked as "final" in src.
+  //    T must be Configuration or a subclass
+  template<class T>
+  optional<T> OverlayResourceString(const T &src, const std::string &xml_data) const;
+  // Streams must be seekable
+  template<class T>
+  optional<T> OverlayResourceStream(const T &src, std::istream &stream) const;
+  //    The ConfigurationBuilder's search path will be searched for the filename
+  //       unless it is an absolute path
+  template<class T>
+  optional<T> OverlayResourceFile(const T &src, const std::string &path) const;
+
+  // Attempts to update the map.  If the update failed (because there was
+  // an existing final value, for example), returns the original map
+  template<class T>
+  optional<T> OverlayValue(const T &src, const std::string &key, const std::string &value) const;
+
+  // Returns an instance of the Configuration with all of the default resource
+  //    files loaded.
+  //    T must be Configuration or a subclass
+  template<class T>
+  optional<T> LoadDefaultResources();
+
+
+  // Returns a vector of filenames and the corresponding status when validation is attempted.
+  //    If the files can be successfully validated, then the status returned for that file is Status::OK
+  //    The files that are validated are those returned by T::GetDefaultFilenames().
+  //    T must be Configuration or a subclass
+  template<class T>
+  std::vector<std::pair<std::string, Status>> ValidateDefaultResources() const;
+
+  /****************************************************************************
+   *                    SEARCH PATH METHODS
+   ***************************************************************************/
+
+  //Creates a configuration loader with the default search path ("$HADOOP_CONF_DIR" or "/etc/hadoop/conf").
+  //If you want to explicitly set the entire search path, call ClearSearchPath() first
+  ConfigurationLoader();
+
+  // Sets the search path to the default search path (namely, "$HADOOP_CONF_DIR" or "/etc/hadoop/conf")
+  void SetDefaultSearchPath();
+
+  // Clears out the search path
+  void ClearSearchPath();
+  // Sets the search path to ":"-delimited paths
+  void SetSearchPath(const std::string & searchPath);
+  // Adds an element to the search path
+  void AddToSearchPath(const std::string & searchPath);
+  // Returns the search path in ":"-delmited form
+  std::string GetSearchPath();
+
+protected:
+  using ConfigMap = Configuration::ConfigMap;
+
+  std::vector<std::pair<std::string, Status>> ValidateResources(std::vector<std::string> filenames) const;
+
+  // Updates the src map with data from the XML in the path
+  //   The search path will be searched for the filename
+  bool UpdateMapWithFile(ConfigMap & map, const std::string & path) const;
+
+  // Updates the src map with data from the XML in the stream
+  //   The stream must be seekable
+  static bool UpdateMapWithStream(ConfigMap & map,
+                                  std::istream & stream);
+  // Updates the src map with data from the XML
+  static bool UpdateMapWithString(Configuration::ConfigMap & src,
+                                  const std::string &xml_data);
+  // Updates the src map with data from the XML
+  static bool UpdateMapWithBytes(Configuration::ConfigMap &map,
+                                 std::vector<char> &raw_bytes);
+
+  // Attempts to update the map.  If the update failed (because there was
+  // an existing final value, for example), returns false
+  static bool UpdateMapWithValue(ConfigMap& map,
+        const std::string& key, const std::string& value, const std::string& final_text);
+
+  std::vector<std::string> search_path_;
+};
+
+}
+
+#include "configuration_loader_impl.h"
+
+#endif

+ 122 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration_loader_impl.h

@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_CONFIGURATION_BUILDER_IMPL_H_
+#define COMMON_CONFIGURATION_BUILDER_IMPL_H_
+
+namespace hdfs {
+
+
+template<class T>
+T ConfigurationLoader::NewConfig() {
+  return T();
+}
+
+template<class T>
+optional<T> ConfigurationLoader::Load(const std::string &xml_data) {
+  return OverlayResourceString<T>(T(), xml_data);
+}
+template<class T>
+optional<T> ConfigurationLoader::LoadFromStream(std::istream &stream) {
+  return OverlayResourceStream<T>(T(), stream);
+}
+template<class T>
+optional<T> ConfigurationLoader::LoadFromFile(const std::string &path) {
+  return OverlayResourceFile<T>(T(), path);
+}
+
+
+template<class T>
+optional<T> ConfigurationLoader::OverlayResourceFile(const T& src, const std::string &path) const {
+  ConfigMap map(src.raw_values_);
+  bool success = UpdateMapWithFile(map, path);
+
+  if (success) {
+    return std::experimental::make_optional<T>(map);
+  } else {
+    return optional<T>();
+  }
+}
+
+template<class T>
+optional<T> ConfigurationLoader::OverlayResourceStream(const T& src, std::istream & stream) const {
+  ConfigMap map(src.raw_values_);
+  bool success = UpdateMapWithStream(map, stream);
+
+  if (success) {
+    return std::experimental::make_optional<T>(map);
+  } else {
+    return optional<T>();
+  }
+}
+
+template<class T>
+optional<T> ConfigurationLoader::OverlayResourceString(const T& src, const std::string &xml_data) const {
+  if (xml_data.size() == 0) {
+    return optional<T>();
+  }
+
+  std::vector<char> raw_bytes(xml_data.begin(), xml_data.end());
+  raw_bytes.push_back('\0');
+
+  ConfigMap map(src.raw_values_);
+  bool success = UpdateMapWithBytes(map, raw_bytes);
+
+  if (success) {
+    return std::experimental::make_optional<T>(map);
+  } else {
+    return optional<T>();
+  }
+}
+
+template<class T>
+optional<T> ConfigurationLoader::OverlayValue(const T& src, const std::string &key, const std::string &value) const {
+  ConfigMap map(src.raw_values_);
+  UpdateMapWithValue(map, key, value, "");
+
+  return std::experimental::make_optional<T>(map);
+}
+
+template <class T>
+optional<T> ConfigurationLoader::LoadDefaultResources() {
+  std::vector<std::string> default_filenames = T::GetDefaultFilenames();
+
+  ConfigMap result;
+  bool success = false;
+
+  for (auto fn: default_filenames) {
+    // We succeed if we have loaded data from any file
+    success |= UpdateMapWithFile(result, fn);
+  }
+
+  if (success) {
+    return std::experimental::make_optional<T>(result);
+  } else {
+    return optional<T>();
+  }
+}
+
+template<class T>
+std::vector<std::pair<std::string, Status> > ConfigurationLoader::ValidateDefaultResources() const{
+  return ValidateResources(T::GetDefaultFilenames());
+}
+
+
+}
+
+#endif

+ 55 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/content_summary.cc

@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <hdfspp/content_summary.h>
+#include <sstream>
+#include <iomanip>
+
+namespace hdfs {
+
+ContentSummary::ContentSummary()
+: length(0),
+  filecount(0),
+  directorycount(0),
+  quota(0),
+  spaceconsumed(0),
+  spacequota(0) {
+}
+
+std::string ContentSummary::str(bool include_quota) const {
+  std::stringstream ss;
+  if(include_quota){
+    ss  << this->quota << " "
+        << spacequota << " "
+        << spaceconsumed << " ";
+  }
+  ss  << directorycount << " "
+      << filecount << " "
+      << length << " "
+      << path;
+  return ss.str();
+}
+
+std::string ContentSummary::str_du() const {
+  std::stringstream ss;
+  ss  << std::left << std::setw(10) << length
+      << path;
+  return ss.str();
+}
+
+}

+ 65 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/continuation/asio.h

@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIB_COMMON_CONTINUATION_ASIO_H_
+#define LIB_COMMON_CONTINUATION_ASIO_H_
+
+#include "continuation.h"
+#include "common/util.h"
+
+#include "hdfspp/status.h"
+
+#include <asio/connect.hpp>
+#include <asio/read.hpp>
+#include <asio/write.hpp>
+#include <asio/ip/tcp.hpp>
+#include <memory>
+
+namespace hdfs {
+namespace asio_continuation {
+
+using namespace continuation;
+
+template <class Stream, class ConstBufferSequence>
+class WriteContinuation : public Continuation {
+public:
+  WriteContinuation(std::shared_ptr<Stream>& stream, const ConstBufferSequence &buffer)
+      : stream_(stream), buffer_(buffer) {}
+
+  virtual void Run(const Next &next) override {
+    auto handler =
+        [next](const asio::error_code &ec, size_t) { next(ToStatus(ec)); };
+    asio::async_write(*stream_, buffer_, handler);
+  }
+
+private:
+  // prevent construction from raw ptr
+  WriteContinuation(Stream *stream, ConstBufferSequence &buffer);
+  std::shared_ptr<Stream> stream_;
+  ConstBufferSequence buffer_;
+};
+
+template <class Stream, class ConstBufferSequence>
+static inline Continuation *Write(std::shared_ptr<Stream> stream,
+                                  const ConstBufferSequence &buffer) {
+  return new WriteContinuation<Stream, ConstBufferSequence>(stream, buffer);
+}
+
+}
+}
+
+#endif

+ 137 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/continuation/continuation.h

@@ -0,0 +1,137 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIB_COMMON_CONTINUATION_CONTINUATION_H_
+#define LIB_COMMON_CONTINUATION_CONTINUATION_H_
+
+#include "hdfspp/status.h"
+#include "common/cancel_tracker.h"
+
+#include <functional>
+#include <memory>
+#include <vector>
+
+namespace hdfs {
+namespace continuation {
+
+class PipelineBase;
+
+/**
+ * A continuation is a fragment of runnable code whose execution will
+ * be scheduled by a \link Pipeline \endlink.
+ *
+ * The Continuation class is a build block to implement the
+ * Continuation Passing Style (CPS) in libhdfs++. In CPS, the
+ * upper-level user specifies the control flow by chaining a sequence
+ * of continuations explicitly through the \link Run() \endlink method,
+ * while in traditional imperative programming the sequences of
+ * sentences implicitly specify the control flow.
+ *
+ * See http://en.wikipedia.org/wiki/Continuation for more details.
+ **/
+class Continuation {
+public:
+  typedef std::function<void(const Status &)> Next;
+  virtual ~Continuation() = default;
+  virtual void Run(const Next &next) = 0;
+  Continuation(const Continuation &) = delete;
+  Continuation &operator=(const Continuation &) = delete;
+
+protected:
+  Continuation() = default;
+};
+
+/**
+ * A pipeline schedules the execution of a chain of \link Continuation
+ * \endlink. The pipeline schedules the execution of continuations
+ * based on their order in the pipeline, where the next parameter for
+ * each continuation points to the \link Schedule() \endlink
+ * method. That way the pipeline executes all scheduled continuations
+ * in sequence.
+ *
+ * The typical use case of a pipeline is executing continuations
+ * asynchronously. Note that a continuation calls the next
+ * continuation when it is finished. If the continuation is posted
+ * into an asynchronous event loop, invoking the next continuation
+ * can be done in the callback handler in the asynchronous event loop.
+ *
+ * The pipeline allocates the memory as follows. A pipeline is always
+ * allocated on the heap. It owns all the continuations as well as the
+ * the state specified by the user. Both the continuations and the
+ * state have the same life cycle of the pipeline. The design
+ * simplifies the problem of ensuring that the executions in the
+ * asynchronous event loop always hold valid pointers w.r.t. the
+ * pipeline. The pipeline will automatically deallocate itself right
+ * after it invokes the callback specified the user.
+ **/
+template <class State> class Pipeline {
+public:
+  typedef std::function<void(const Status &, const State &)> UserHandler;
+  static Pipeline *Create() { return new Pipeline(); }
+  static Pipeline *Create(CancelHandle cancel_handle) {
+    return new Pipeline(cancel_handle);
+  }
+  Pipeline &Push(Continuation *stage);
+  void Run(UserHandler &&handler);
+  State &state() { return state_; }
+
+private:
+  State state_;
+  std::vector<std::unique_ptr<Continuation>> routines_;
+  size_t stage_;
+  std::function<void(const Status &, const State &)> handler_;
+
+  Pipeline() : stage_(0), cancel_handle_(CancelTracker::New()) {}
+  Pipeline(CancelHandle cancel_handle) : stage_(0), cancel_handle_(cancel_handle) {}
+  ~Pipeline() = default;
+  void Schedule(const Status &status);
+  CancelHandle cancel_handle_;
+};
+
+template <class State>
+inline Pipeline<State> &Pipeline<State>::Push(Continuation *stage) {
+  routines_.emplace_back(std::unique_ptr<Continuation>(stage));
+  return *this;
+}
+
+template <class State>
+inline void Pipeline<State>::Schedule(const Status &status) {
+  // catch cancelation signalled from outside of pipeline
+  if(cancel_handle_->is_canceled()) {
+    handler_(Status::Canceled(), state_);
+    routines_.clear();
+    delete this;
+  } else if (!status.ok() || stage_ >= routines_.size()) {
+    handler_(status, state_);
+    routines_.clear();
+    delete this;
+  } else {
+    auto next = routines_[stage_].get();
+    ++stage_;
+    next->Run(std::bind(&Pipeline::Schedule, this, std::placeholders::_1));
+  }
+}
+
+template <class State> inline void Pipeline<State>::Run(UserHandler &&handler) {
+  handler_ = std::move(handler);
+  Schedule(Status::OK());
+}
+
+}
+}
+
+#endif

+ 129 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/continuation/protobuf.h

@@ -0,0 +1,129 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIBHDFSPP_COMMON_CONTINUATION_PROTOBUF_H_
+#define LIBHDFSPP_COMMON_CONTINUATION_PROTOBUF_H_
+
+#include "common/util.h"
+
+#include <asio/read.hpp>
+
+#include <google/protobuf/message_lite.h>
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
+
+#include <cassert>
+
+namespace hdfs {
+namespace continuation {
+
+template <class Stream, size_t MaxMessageSize = 512>
+struct ReadDelimitedPBMessageContinuation : public Continuation {
+  ReadDelimitedPBMessageContinuation(std::shared_ptr<Stream> stream,
+                                     ::google::protobuf::MessageLite *msg)
+      : stream_(stream), msg_(msg) {}
+
+  virtual void Run(const Next &next) override {
+    namespace pbio = google::protobuf::io;
+    auto handler = [this, next](const asio::error_code &ec, size_t) {
+      Status status;
+      if (ec) {
+        status = ToStatus(ec);
+      } else {
+        pbio::ArrayInputStream as(&buf_[0], buf_.size());
+        pbio::CodedInputStream is(&as);
+        uint32_t size = 0;
+        bool v = is.ReadVarint32(&size);
+        assert(v);
+        (void)v; //avoids unused variable warning
+        is.PushLimit(size);
+        msg_->Clear();
+        v = msg_->MergeFromCodedStream(&is);
+        assert(v);
+      }
+      next(status);
+    };
+    asio::async_read(*stream_,
+        asio::buffer(buf_),
+        std::bind(&ReadDelimitedPBMessageContinuation::CompletionHandler, this,
+                  std::placeholders::_1, std::placeholders::_2),
+        handler);
+  }
+
+private:
+  size_t CompletionHandler(const asio::error_code &ec, size_t transferred) {
+    if (ec) {
+      return 0;
+    }
+
+    size_t offset = 0, len = 0;
+    for (size_t i = 0; i + 1 < transferred && i < sizeof(int32_t); ++i) {
+      len = (len << 7) | (buf_[i] & 0x7f);
+      if ((uint8_t)buf_.at(i) < 0x80) {
+        offset = i + 1;
+        break;
+      }
+    }
+
+    assert(offset + len < buf_.size() && "Message is too big");
+    return offset ? len + offset - transferred : 1;
+  }
+
+  std::shared_ptr<Stream> stream_;
+  ::google::protobuf::MessageLite *msg_;
+  std::array<char, MaxMessageSize> buf_;
+};
+
+template <class Stream>
+struct WriteDelimitedPBMessageContinuation : Continuation {
+  WriteDelimitedPBMessageContinuation(std::shared_ptr<Stream> stream,
+                                      const google::protobuf::MessageLite *msg)
+      : stream_(stream), msg_(msg) {}
+
+  virtual void Run(const Next &next) override {
+    bool success = true;
+    buf_ = SerializeDelimitedProtobufMessage(msg_, &success);
+
+    if(!success) {
+      next(Status::Error("Unable to serialize protobuf message."));
+      return;
+    }
+
+    asio::async_write(*stream_, asio::buffer(buf_), [next](const asio::error_code &ec, size_t) { next(ToStatus(ec)); } );
+  }
+
+private:
+  std::shared_ptr<Stream> stream_;
+  const google::protobuf::MessageLite *msg_;
+  std::string buf_;
+};
+
+template <class Stream, size_t MaxMessageSize = 512>
+static inline Continuation *
+ReadDelimitedPBMessage(std::shared_ptr<Stream> stream, ::google::protobuf::MessageLite *msg) {
+  return new ReadDelimitedPBMessageContinuation<Stream, MaxMessageSize>(stream,
+                                                                        msg);
+}
+
+template <class Stream>
+static inline Continuation *
+WriteDelimitedPBMessage(std::shared_ptr<Stream> stream, ::google::protobuf::MessageLite *msg) {
+  return new WriteDelimitedPBMessageContinuation<Stream>(stream, msg);
+}
+}
+}
+#endif

+ 61 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/fsinfo.cc

@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <hdfspp/fsinfo.h>
+#include <sstream>
+#include <iomanip>
+
+namespace hdfs {
+
+FsInfo::FsInfo()
+  : capacity(0),
+    used(0),
+    remaining(0),
+    under_replicated(0),
+    corrupt_blocks(0),
+    missing_blocks(0),
+    missing_repl_one_blocks(0),
+    blocks_in_future(0) {
+}
+
+std::string FsInfo::str(const std::string fs_name) const {
+  std::string fs_name_label = "Filesystem";
+  std::string size = std::to_string(capacity);
+  std::string size_label = "Size";
+  std::string used = std::to_string(this->used);
+  std::string used_label = "Used";
+  std::string available = std::to_string(remaining);
+  std::string available_label = "Available";
+  std::string use_percentage = std::to_string(this->used * 100 / capacity) + "%";
+  std::string use_percentage_label = "Use%";
+  std::stringstream ss;
+  ss  << std::left << std::setw(std::max(fs_name.size(), fs_name_label.size())) << fs_name_label
+      << std::right << std::setw(std::max(size.size(), size_label.size()) + 2) << size_label
+      << std::right << std::setw(std::max(used.size(), used_label.size()) + 2) << used_label
+      << std::right << std::setw(std::max(available.size(), available_label.size()) + 2) << available_label
+      << std::right << std::setw(std::max(use_percentage.size(), use_percentage_label.size()) + 2) << use_percentage_label
+      << std::endl
+      << std::left << std::setw(std::max(fs_name.size(), fs_name_label.size())) << fs_name
+      << std::right << std::setw(std::max(size.size(), size_label.size()) + 2) << size
+      << std::right << std::setw(std::max(used.size(), used_label.size()) + 2) << used
+      << std::right << std::setw(std::max(available.size(), available_label.size()) + 2) << available
+      << std::right << std::setw(std::max(use_percentage.size(), use_percentage_label.size()) + 2) << use_percentage;
+  return ss.str();
+}
+
+}

+ 210 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/hdfs_configuration.cc

@@ -0,0 +1,210 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common/hdfs_configuration.h"
+#include "common/logging.h"
+
+#include <exception>
+
+#ifndef DEFAULT_SCHEME
+  #define DEFAULT_SCHEME "hdfs://"
+#endif
+
+namespace hdfs {
+
+// Constructs a configuration with no search path and no resources loaded
+HdfsConfiguration::HdfsConfiguration() : Configuration() {}
+
+// Constructs a configuration with a copy of the input data
+HdfsConfiguration::HdfsConfiguration(ConfigMap &src_map) : Configuration(src_map) {}
+HdfsConfiguration::HdfsConfiguration(const ConfigMap &src_map) : Configuration(src_map) {}
+
+std::vector<std::string> HdfsConfiguration::GetDefaultFilenames() {
+  auto result = Configuration::GetDefaultFilenames();
+  result.push_back("hdfs-site.xml");
+  return result;
+}
+
+// Sets a value iff the optional<T> has a value
+template <class T, class U>
+void OptionalSet(T& target, optional<U> value) {
+  if (value)
+    target = *value;
+}
+
+std::vector<std::string> SplitOnComma(const std::string &s, bool include_empty_strings) {
+  std::vector<std::string> res;
+  std::string buf;
+
+  for(unsigned int i=0;i<s.size();i++) {
+    char c = s[i];
+    if(c != ',') {
+      buf += c;
+    } else {
+      if(!include_empty_strings && buf.empty()) {
+        // Skip adding empty strings if needed
+        continue;
+      }
+      res.push_back(buf);
+      buf.clear();
+    }
+  }
+
+  if(buf.size() > 0)
+    res.push_back(buf);
+
+  return res;
+}
+
+std::string RemoveSpaces(const std::string &str) {
+  std::string res;
+  for(unsigned int i=0; i<str.size(); i++) {
+    char curr = str[i];
+    if(curr != ' ') {
+      res += curr;
+    }
+  }
+  return res;
+}
+
+// Prepend hdfs:// to string if there isn't already a scheme
+// Converts unset optional into empty string
+std::string PrependHdfsScheme(optional<std::string> str) {
+  if(!str)
+    return "";
+
+  if(str.value().find("://") == std::string::npos)
+    return DEFAULT_SCHEME + str.value();
+  return str.value();
+}
+
+// It's either use this, goto, or a lot of returns w/ status checks
+struct ha_parse_error : public std::exception {
+  std::string desc;
+  ha_parse_error(const std::string &val) : desc(val) {};
+  const char *what() const noexcept override  {
+    return desc.c_str();
+  };
+};
+
+std::vector<NamenodeInfo> HdfsConfiguration::LookupNameService(const std::string &nameservice) {
+  LOG_TRACE(kRPC, << "HDFSConfiguration@" << this << "::LookupNameService( nameservice=" << nameservice<< " ) called");
+
+  std::vector<NamenodeInfo> namenodes;
+  try {
+    // Find namenodes that belong to nameservice
+    std::vector<std::string> namenode_ids;
+    {
+      std::string service_nodes = std::string("dfs.ha.namenodes.") + nameservice;
+      optional<std::string> namenode_list = Get(service_nodes);
+      if(namenode_list)
+        namenode_ids = SplitOnComma(namenode_list.value(), false);
+      else
+        throw ha_parse_error("unable to find " + service_nodes);
+
+      for(unsigned int i=0; i<namenode_ids.size(); i++) {
+        namenode_ids[i] = RemoveSpaces(namenode_ids[i]);
+        LOG_INFO(kRPC, << "Namenode: " << namenode_ids[i]);
+      }
+    }
+
+    // should this error if we only find 1 NN?
+    if(namenode_ids.empty())
+      throw ha_parse_error("No namenodes found for nameservice " + nameservice);
+
+    // Get URI for each HA namenode
+    for(auto node_id=namenode_ids.begin(); node_id != namenode_ids.end(); node_id++) {
+      // find URI
+      std::string dom_node_name = std::string("dfs.namenode.rpc-address.") + nameservice + "." + *node_id;
+
+      URI uri;
+      try {
+        uri = URI::parse_from_string(PrependHdfsScheme(Get(dom_node_name)));
+      } catch (const uri_parse_error) {
+        throw ha_parse_error("unable to find " + dom_node_name);
+      }
+
+      if(uri.str() == "") {
+        LOG_WARN(kRPC, << "Attempted to read info for nameservice " << nameservice << " node " << dom_node_name << " but didn't find anything.")
+      } else {
+        LOG_INFO(kRPC, << "Read the following HA Namenode URI from config" << uri.GetDebugString());
+      }
+
+      NamenodeInfo node(nameservice, *node_id, uri);
+      namenodes.push_back(node);
+    }
+  } catch (ha_parse_error e) {
+    LOG_ERROR(kRPC, << "HA cluster detected but failed because : " << e.what());
+    namenodes.clear(); // Don't return inconsistent view
+  }
+  return namenodes;
+}
+
+// Interprets the resources to build an Options object
+Options HdfsConfiguration::GetOptions() {
+  Options result;
+
+  OptionalSet(result.rpc_timeout, GetInt(kDfsClientSocketTimeoutKey));
+  OptionalSet(result.rpc_connect_timeout, GetInt(kIpcClientConnectTimeoutKey));
+  OptionalSet(result.max_rpc_retries, GetInt(kIpcClientConnectMaxRetriesKey));
+  OptionalSet(result.rpc_retry_delay_ms, GetInt(kIpcClientConnectRetryIntervalKey));
+  OptionalSet(result.defaultFS, GetUri(kFsDefaultFsKey));
+  OptionalSet(result.block_size, GetInt(kDfsBlockSizeKey));
+
+
+  OptionalSet(result.failover_max_retries, GetInt(kDfsClientFailoverMaxAttempts));
+  OptionalSet(result.failover_connection_max_retries, GetInt(kDfsClientFailoverConnectionRetriesOnTimeouts));
+
+  // Load all nameservices if it's HA configured
+  optional<std::string> dfs_nameservices = Get("dfs.nameservices");
+  if(dfs_nameservices) {
+    std::string nameservice = dfs_nameservices.value();
+
+    std::vector<std::string> all_services = SplitOnComma(nameservice, false);
+
+    // Look up nodes for each nameservice so that FileSystem object can support
+    // multiple nameservices by ID.
+    for(const std::string &service : all_services) {
+      if(service.empty())
+        continue;
+
+      LOG_DEBUG(kFileSystem, << "Parsing info for nameservice: " << service);
+      std::vector<NamenodeInfo> nodes = LookupNameService(service);
+      if(nodes.empty()) {
+        LOG_WARN(kFileSystem, << "Nameservice \"" << service << "\" declared in config but nodes aren't");
+      } else {
+        result.services[service] = nodes;
+      }
+    }
+  }
+
+  optional<std::string> authentication_value = Get(kHadoopSecurityAuthenticationKey);
+
+  if (authentication_value ) {
+      std::string fixed_case_value = fixCase(authentication_value.value());
+      if (fixed_case_value == fixCase(kHadoopSecurityAuthentication_kerberos))
+          result.authentication = Options::kKerberos;
+      else
+          result.authentication = Options::kSimple;
+  }
+
+  return result;
+}
+
+
+}

+ 70 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/hdfs_configuration.h

@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_HDFS_CONFIGURATION_H_
+#define COMMON_HDFS_CONFIGURATION_H_
+
+#include "common/configuration.h"
+#include "hdfspp/options.h"
+
+#include <string>
+#include <map>
+#include <vector>
+#include <set>
+#include <istream>
+#include <stdint.h>
+
+namespace hdfs {
+
+class HdfsConfiguration : public Configuration {
+  public:
+    // Interprets the resources to build an Options object
+    Options GetOptions();
+
+    // Keys to look for in the configuration file
+    static constexpr const char * kFsDefaultFsKey = "fs.defaultFS";
+    static constexpr const char * kDfsClientSocketTimeoutKey = "dfs.client.socket-timeout";
+    static constexpr const char * kIpcClientConnectTimeoutKey = "ipc.client.connect.timeout";
+    static constexpr const char * kIpcClientConnectMaxRetriesKey = "ipc.client.connect.max.retries";
+    static constexpr const char * kIpcClientConnectRetryIntervalKey = "ipc.client.connect.retry.interval";
+    static constexpr const char * kHadoopSecurityAuthenticationKey = "hadoop.security.authentication";
+    static constexpr const char * kHadoopSecurityAuthentication_simple = "simple";
+    static constexpr const char * kHadoopSecurityAuthentication_kerberos = "kerberos";
+    static constexpr const char * kDfsBlockSizeKey = "dfs.blocksize";
+
+    static constexpr const char * kDfsClientFailoverMaxAttempts = "dfs.client.failover.max.attempts";
+    static constexpr const char * kDfsClientFailoverConnectionRetriesOnTimeouts = "dfs.client.failover.connection.retries.on.timeouts";
+
+
+private:
+    friend class ConfigurationLoader;
+
+    // Constructs a configuration with no search path and no resources loaded
+    HdfsConfiguration();
+
+    // Constructs a configuration with some static data
+    HdfsConfiguration(ConfigMap &src_map);
+    HdfsConfiguration(const ConfigMap &src_map);
+
+    static std::vector<std::string> GetDefaultFilenames();
+    std::vector<NamenodeInfo> LookupNameService(const std::string &nameservice);
+};
+
+}
+
+#endif

+ 146 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/hdfs_ioservice.cc

@@ -0,0 +1,146 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hdfs_ioservice.h"
+
+#include <thread>
+#include <mutex>
+#include <vector>
+
+#include "common/logging.h"
+
+namespace hdfs {
+
+IoService::~IoService() {}
+
+IoService *IoService::New() {
+  return new IoServiceImpl();
+}
+
+std::shared_ptr<IoService> IoService::MakeShared() {
+  return std::make_shared<IoServiceImpl>();
+}
+
+
+unsigned int IoServiceImpl::InitDefaultWorkers() {
+  LOG_TRACE(kAsyncRuntime, << "IoServiceImpl::InitDefaultWorkers@" << this << " called.");
+  unsigned int logical_thread_count = std::thread::hardware_concurrency();
+#ifndef DISABLE_CONCURRENT_WORKERS
+  if(logical_thread_count < 1) {
+    LOG_WARN(kAsyncRuntime, << "IoServiceImpl::InitDefaultWorkers did not detect any logical processors.  Defaulting to 1 worker thread.");
+  } else {
+    LOG_DEBUG(kRPC, << "IoServiceImpl::InitDefaultWorkers detected " << logical_thread_count << " logical threads and will spawn a worker for each.");
+  }
+#else
+  if(logical_thread_count > 0) {
+    LOG_DEBUG(kAsyncRuntime, << "IoServiceImpl::InitDefaultWorkers: " << logical_thread_count << " threads available.  Concurrent workers are disabled so 1 worker thread will be used");
+  }
+  logical_thread_count = 1;
+#endif
+  return InitWorkers(logical_thread_count);
+}
+
+unsigned int IoServiceImpl::InitWorkers(unsigned int thread_count) {
+#ifdef DISABLED_CONCURRENT_WORKERS
+  LOG_DEBUG(kAsyncRuntime, << "IoServiceImpl::InitWorkers: " << thread_count << " threads specified but concurrent workers are disabled so 1 will be used");
+  thread_count = 1;
+#endif
+  unsigned int created_threads = 0;
+  for(unsigned int i=0; i<thread_count; i++) {
+    bool created = AddWorkerThread();
+    if(created) {
+      created_threads++;
+    } else {
+      LOG_DEBUG(kAsyncRuntime, << "IoServiceImpl@" << this << " ::InitWorkers failed to create a worker thread");
+    }
+  }
+  if(created_threads != thread_count) {
+    LOG_WARN(kAsyncRuntime, << "IoServiceImpl@" << this << " ::InitWorkers attempted to create "
+                            << thread_count << " but only created " << created_threads
+                            << " worker threads.  Make sure this process has adequate resources.");
+  }
+  return created_threads;
+}
+
+bool IoServiceImpl::AddWorkerThread() {
+  mutex_guard state_lock(state_lock_);
+  auto async_worker = [this]() {
+    this->ThreadStartHook();
+    this->Run();
+    this->ThreadExitHook();
+  };
+  worker_threads_.push_back(WorkerPtr( new std::thread(async_worker)) );
+  return true;
+}
+
+
+void IoServiceImpl::ThreadStartHook() {
+  mutex_guard state_lock(state_lock_);
+  LOG_DEBUG(kAsyncRuntime, << "Worker thread #" << std::this_thread::get_id() << " for IoServiceImpl@" << this << " starting");
+}
+
+void IoServiceImpl::ThreadExitHook() {
+  mutex_guard state_lock(state_lock_);
+  LOG_DEBUG(kAsyncRuntime, << "Worker thread #" << std::this_thread::get_id() << " for IoServiceImpl@" << this << " exiting");
+}
+
+void IoServiceImpl::PostTask(std::function<void(void)>& asyncTask) {
+  io_service_.post(asyncTask);
+}
+
+void IoServiceImpl::WorkerDeleter::operator()(std::thread *t) {
+  // It is far too easy to destroy the filesystem (and thus the threadpool)
+  //     from within one of the worker threads, leading to a deadlock.  Let's
+  //     provide some explicit protection.
+  if(t->get_id() == std::this_thread::get_id()) {
+    LOG_ERROR(kAsyncRuntime, << "FileSystemImpl::WorkerDeleter::operator(treadptr="
+                             << t << ") : FATAL: Attempted to destroy a thread pool"
+                             "from within a callback of the thread pool!");
+  }
+  t->join();
+  delete t;
+}
+
+// As long as this just forwards to an asio::io_service method it doesn't need a lock
+void IoServiceImpl::Run() {
+  // The IoService executes callbacks provided by library users in the context of worker threads,
+  // there is no way of preventing those callbacks from throwing but we can at least prevent them
+  // from escaping this library and crashing the process.
+
+  // As recommended in http://www.boost.org/doc/libs/1_39_0/doc/html/boost_asio/reference/io_service.html#boost_asio.reference.io_service.effect_of_exceptions_thrown_from_handlers
+  asio::io_service::work work(io_service_);
+  while(true)
+  {
+    try
+    {
+      io_service_.run();
+      break;
+    } catch (const std::exception & e) {
+      LOG_WARN(kFileSystem, << "Unexpected exception in libhdfspp worker thread: " << e.what());
+    } catch (...) {
+      LOG_WARN(kFileSystem, << "Unexpected value not derived from std::exception in libhdfspp worker thread");
+    }
+  }
+}
+
+unsigned int IoServiceImpl::get_worker_thread_count() {
+  mutex_guard state_lock(state_lock_);
+  return worker_threads_.size();
+}
+
+}

+ 79 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/hdfs_ioservice.h

@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_HDFS_IOSERVICE_H_
+#define COMMON_HDFS_IOSERVICE_H_
+
+#include "hdfspp/hdfspp.h"
+
+#include <asio/io_service.hpp>
+#include "common/util.h"
+
+#include <mutex>
+#include <thread>
+
+namespace hdfs {
+
+// Uncomment this to determine if issues are due to concurrency or logic faults
+// If tests still fail with concurrency disabled it's most likely a logic bug
+#define DISABLE_CONCURRENT_WORKERS
+
+/*
+ *  A thin wrapper over the asio::io_service with a few extras
+ *    -manages it's own worker threads
+ *    -some helpers for sharing with multiple modules that need to do async work
+ */
+
+class IoServiceImpl : public IoService {
+ public:
+  IoServiceImpl() {}
+
+  virtual unsigned int InitDefaultWorkers() override;
+  virtual unsigned int InitWorkers(unsigned int thread_count) override;
+  virtual void PostTask(std::function<void(void)>& asyncTask) override;
+  virtual void Run() override;
+  virtual void Stop() override { io_service_.stop(); }
+
+  // Add a single worker thread, in the common case try to avoid this in favor
+  // of Init[Default]Workers. Public for use by tests and rare cases where a
+  // client wants very explicit control of threading for performance reasons
+  // e.g. pinning threads to NUMA nodes.
+  bool AddWorkerThread();
+
+  // Be very careful about using this: HDFS-10241
+  ::asio::io_service &io_service() { return io_service_; }
+  unsigned int get_worker_thread_count();
+ private:
+  std::mutex state_lock_;
+  ::asio::io_service io_service_;
+
+  // For doing logging + resource manager updates on thread start/exit
+  void ThreadStartHook();
+  void ThreadExitHook();
+
+  // Support for async worker threads
+  struct WorkerDeleter {
+    void operator()(std::thread *t);
+  };
+  typedef std::unique_ptr<std::thread, WorkerDeleter> WorkerPtr;
+  std::vector<WorkerPtr> worker_threads_;
+};
+
+}
+
+#endif

+ 89 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/libhdfs_events_impl.cc

@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "libhdfs_events_impl.h"
+
+#include <exception>
+
+namespace hdfs {
+
+/**
+ * Default no-op callback implementations
+ **/
+
+LibhdfsEvents::LibhdfsEvents() : fs_callback(std::experimental::nullopt),
+                                 file_callback(std::experimental::nullopt)
+{}
+
+LibhdfsEvents::~LibhdfsEvents() {}
+
+void LibhdfsEvents::set_fs_callback(const fs_event_callback & callback) {
+  fs_callback = callback;
+}
+
+void LibhdfsEvents::set_file_callback(const file_event_callback & callback) {
+  file_callback = callback;
+}
+
+void LibhdfsEvents::clear_fs_callback() {
+  fs_callback = std::experimental::nullopt;
+}
+
+void LibhdfsEvents::clear_file_callback() {
+  file_callback = std::experimental::nullopt;
+}
+
+event_response LibhdfsEvents::call(const char * event,
+                                   const char * cluster,
+                                   int64_t value)
+{
+  if (fs_callback) {
+    try {
+      return fs_callback->operator()(event, cluster, value);
+    } catch (const std::exception& e) {
+      return event_response::make_caught_std_exception(e.what());
+    } catch (...) {
+      // Arguably calling abort() here would serve as appropriate
+      // punishment for those who throw garbage that isn't derived
+      // from std::exception...
+      return event_response::make_caught_unknown_exception();
+    }
+  } else {
+    return event_response::make_ok();
+  }
+}
+
+event_response LibhdfsEvents::call(const char * event,
+                                   const char * cluster,
+                                   const char * file,
+                                   int64_t value)
+{
+  if (file_callback) {
+    try {
+      return file_callback->operator()(event, cluster, file, value);
+    } catch (const std::exception& e) {
+      return event_response::make_caught_std_exception(e.what());
+    } catch (...) {
+      return event_response::make_caught_unknown_exception();
+    }
+  } else {
+    return event_response::make_ok();
+  }
+}
+
+}

+ 59 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/libhdfs_events_impl.h

@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBHDFSPP_COMMON_LIBHDFSEVENTS_IMPL
+#define LIBHDFSPP_COMMON_LIBHDFSEVENTS_IMPL
+
+#include "hdfspp/events.h"
+#include "common/optional_wrapper.h"
+
+#include <functional>
+
+namespace hdfs {
+
+/**
+ * Users can specify event handlers.  Default is a no-op handler.
+ **/
+class LibhdfsEvents {
+public:
+  LibhdfsEvents();
+  virtual ~LibhdfsEvents();
+
+  void set_fs_callback(const fs_event_callback & callback);
+  void set_file_callback(const file_event_callback & callback);
+  void clear_fs_callback();
+  void clear_file_callback();
+
+  event_response call(const char *event,
+                      const char *cluster,
+                      int64_t value);
+
+  event_response call(const char *event,
+                      const char *cluster,
+                      const char *file,
+                      int64_t value);
+private:
+  // Called when fs events occur
+  std::experimental::optional<fs_event_callback> fs_callback;
+
+  // Called when file events occur
+  std::experimental::optional<file_event_callback> file_callback;
+};
+
+}
+#endif

+ 100 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/locks.cc

@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hdfspp/locks.h"
+
+#include <mutex>
+
+
+namespace hdfs {
+
+LockGuard::LockGuard(Mutex *m) : _mtx(m) {
+  if(!m) {
+    throw LockFailure("LockGuard passed invalid (null) Mutex pointer");
+  }
+  _mtx->lock();
+}
+
+LockGuard::~LockGuard() {
+  if(_mtx) {
+    _mtx->unlock();
+  }
+}
+
+
+// Basic mutexes to use as default.  Just a wrapper around C++11 std::mutex.
+class DefaultMutex : public Mutex {
+ public:
+  DefaultMutex() {}
+
+  void lock() override {
+    // Could throw in here if the implementation couldn't lock for some reason.
+    _mtx.lock();
+  }
+
+  void unlock() override {
+    _mtx.unlock();
+  }
+
+  std::string str() override {
+    return "DefaultMutex";
+  }
+ private:
+  std::mutex _mtx;
+};
+
+DefaultMutex defaultTestMutex;
+DefaultMutex defaultGssapiMutex;
+
+// LockManager static var instantiation
+Mutex *LockManager::TEST_default_mutex = &defaultTestMutex;
+Mutex *LockManager::gssapiMtx = &defaultGssapiMutex;
+std::mutex LockManager::_state_lock;
+bool LockManager::_finalized = false;
+
+bool LockManager::InitLocks(Mutex *gssapi) {
+  std::lock_guard<std::mutex> guard(_state_lock);
+
+  // You get once shot to set this - swapping the locks
+  // out while in use gets risky.  It can still be done by
+  // using the Mutex as a proxy object if one understands
+  // the implied risk of doing so.
+  if(_finalized)
+    return false;
+
+  gssapiMtx = gssapi;
+  _finalized = true;
+  return true;
+}
+
+Mutex *LockManager::getGssapiMutex() {
+  std::lock_guard<std::mutex> guard(_state_lock);
+  return gssapiMtx;
+}
+
+Mutex *LockManager::TEST_get_default_mutex() {
+  return TEST_default_mutex;
+}
+
+void LockManager::TEST_reset_manager() {
+  _finalized = false;
+  // user still responsible for cleanup
+  gssapiMtx = &defaultGssapiMutex;
+}
+
+} // end namepace hdfs

+ 227 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/logging.cc

@@ -0,0 +1,227 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "logging.h"
+
+#include <ctime>
+#include <cstring>
+#include <thread>
+#include <iostream>
+#include <sstream>
+
+namespace hdfs
+{
+
+LogManager::LogManager() {}
+std::unique_ptr<LoggerInterface> LogManager::logger_impl_(new StderrLogger());
+std::mutex LogManager::impl_lock_;
+uint32_t LogManager::component_mask_ = 0xFFFFFFFF;
+uint32_t LogManager::level_threshold_ = kWarning;
+
+void LogManager::DisableLogForComponent(LogSourceComponent c) {
+  // AND with all bits other than one we want to unset
+  std::lock_guard<std::mutex> impl_lock(impl_lock_);
+  component_mask_ &= ~c;
+}
+
+void LogManager::EnableLogForComponent(LogSourceComponent c) {
+  // OR with bit to set
+  std::lock_guard<std::mutex> impl_lock(impl_lock_);
+  component_mask_ |= c;
+}
+
+void LogManager::SetLogLevel(LogLevel level) {
+  std::lock_guard<std::mutex> impl_lock(impl_lock_);
+  level_threshold_ = level;
+}
+
+void LogManager::Write(const LogMessage& msg) {
+  std::lock_guard<std::mutex> impl_lock(impl_lock_);
+  if(logger_impl_)
+    logger_impl_->Write(msg);
+}
+
+void LogManager::SetLoggerImplementation(std::unique_ptr<LoggerInterface> impl) {
+  std::lock_guard<std::mutex> impl_lock(impl_lock_);
+  logger_impl_.reset(impl.release());
+}
+
+
+/**
+ *  Simple plugin to dump logs to stderr
+ **/
+void StderrLogger::Write(const LogMessage& msg) {
+  std::stringstream formatted;
+
+  if(show_level_)
+    formatted << msg.level_string();
+
+  if(show_component_)
+    formatted << msg.component_string();
+
+  if(show_timestamp_) {
+    time_t current_time = std::time(nullptr);
+    char timestr[128];
+    memset(timestr, 0, 128);
+    int res = std::strftime(timestr, 128, "%a %b %e %H:%M:%S %Y", std::localtime(&current_time));
+    if(res > 0) {
+      formatted << '[' << (const char*)timestr << ']';
+    } else {
+      formatted << "[Error formatting timestamp]";
+    }
+  }
+
+  if(show_component_) {
+    formatted << "[Thread id = " << std::this_thread::get_id() << ']';
+  }
+
+  if(show_file_) {
+    //  __FILE__ contains absolute path, which is giant if doing a build inside the
+    //  Hadoop tree.  Trim down to relative to libhdfspp/
+    std::string abs_path(msg.file_name());
+    size_t rel_path_idx = abs_path.find("libhdfspp/");
+    //  Default to whole string if library is being built in an odd way
+    if(rel_path_idx == std::string::npos)
+      rel_path_idx = 0;
+
+    formatted << '[' << (const char*)&abs_path[rel_path_idx] << ":" << msg.file_line() << ']';
+  }
+
+  std::cerr << formatted.str() << "    " << msg.MsgString() << std::endl;
+}
+
+void StderrLogger::set_show_timestamp(bool show) {
+  show_timestamp_ = show;
+}
+void StderrLogger::set_show_level(bool show) {
+  show_level_ = show;
+}
+void StderrLogger::set_show_thread(bool show) {
+  show_thread_ = show;
+}
+void StderrLogger::set_show_component(bool show) {
+  show_component_ = show;
+}
+
+
+LogMessage::~LogMessage() {
+  LogManager::Write(*this);
+}
+
+LogMessage& LogMessage::operator<<(const std::string *str) {
+  if(str)
+    msg_buffer_ << str;
+  else
+    msg_buffer_ << "<nullptr>";
+  return *this;
+}
+
+LogMessage& LogMessage::operator<<(const std::string& str) {
+  msg_buffer_ << str;
+  return *this;
+}
+
+LogMessage& LogMessage::operator<<(const ::asio::ip::tcp::endpoint& endpoint) {
+  msg_buffer_ << endpoint;
+  return *this;
+}
+
+LogMessage& LogMessage::operator<<(const char *str) {
+  if(str)
+    msg_buffer_ << str;
+  else
+    msg_buffer_ << "<nullptr>";
+  return *this;
+}
+
+LogMessage& LogMessage::operator<<(bool val) {
+  if(val)
+    msg_buffer_ << "true";
+  else
+    msg_buffer_ << "false";
+  return *this;
+}
+
+LogMessage& LogMessage::operator<<(int32_t val) {
+  msg_buffer_ << val;
+  return *this;
+}
+
+LogMessage& LogMessage::operator<<(uint32_t val) {
+  msg_buffer_ << val;
+  return *this;
+}
+
+LogMessage& LogMessage::operator<<(int64_t val) {
+  msg_buffer_ << val;
+  return *this;
+}
+
+LogMessage& LogMessage::operator<<(uint64_t val) {
+  msg_buffer_ << val;
+  return *this;
+}
+
+LogMessage& LogMessage::operator<<(void *ptr) {
+  msg_buffer_ << ptr;
+  return *this;
+}
+
+
+LogMessage& LogMessage::operator<<(const std::thread::id& tid) {
+  msg_buffer_ << tid;
+  return *this;
+}
+
+std::string LogMessage::MsgString() const {
+  return msg_buffer_.str();
+}
+
+const char * kLevelStrings[5] = {
+  "[TRACE ]",
+  "[DEBUG ]",
+  "[INFO  ]",
+  "[WARN  ]",
+  "[ERROR ]"
+};
+
+const char * LogMessage::level_string() const {
+  return kLevelStrings[level_];
+}
+
+const char * kComponentStrings[6] = {
+  "[Unknown       ]",
+  "[RPC           ]",
+  "[BlockReader   ]",
+  "[FileHandle    ]",
+  "[FileSystem    ]",
+  "[Async Runtime ]",
+};
+
+const char * LogMessage::component_string() const {
+  switch(component_) {
+    case kRPC: return kComponentStrings[1];
+    case kBlockReader: return kComponentStrings[2];
+    case kFileHandle: return kComponentStrings[3];
+    case kFileSystem: return kComponentStrings[4];
+    case kAsyncRuntime: return kComponentStrings[5];
+    default: return kComponentStrings[0];
+  }
+}
+
+}

+ 217 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/logging.h

@@ -0,0 +1,217 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIB_COMMON_LOGGING_H_
+#define LIB_COMMON_LOGGING_H_
+
+#include <asio/ip/tcp.hpp>
+
+#include "hdfspp/log.h"
+
+#include <iostream>
+#include <sstream>
+#include <mutex>
+#include <memory>
+#include <thread>
+
+#include <asio/ip/tcp.hpp>
+
+namespace hdfs {
+
+/**
+ *  Logging mechanism to provide lightweight logging to stderr as well as
+ *  as a callback mechanism to allow C clients and larger third party libs
+ *  to be used to handle logging.  When adding a new log message to the
+ *  library use the macros defined below (LOG_TRACE..LOG_ERROR) rather than
+ *  using the LogMessage and LogManager objects directly.
+ **/
+
+enum LogLevel {
+  kTrace     = 0,
+  kDebug     = 1,
+  kInfo      = 2,
+  kWarning   = 3,
+  kError     = 4,
+};
+
+enum LogSourceComponent {
+  kUnknown      = 1 << 0,
+  kRPC          = 1 << 1,
+  kBlockReader  = 1 << 2,
+  kFileHandle   = 1 << 3,
+  kFileSystem   = 1 << 4,
+  kAsyncRuntime = 1 << 5,
+};
+
+#define LOG_TRACE(C, MSG) do { \
+if(LogManager::ShouldLog(kTrace,C)) { \
+  LogMessage(kTrace, __FILE__, __LINE__, C) MSG; \
+}} while (0);
+
+
+#define LOG_DEBUG(C, MSG) do { \
+if(LogManager::ShouldLog(kDebug,C)) { \
+  LogMessage(kDebug, __FILE__, __LINE__, C) MSG; \
+}} while (0);
+
+#define LOG_INFO(C, MSG) do { \
+if(LogManager::ShouldLog(kInfo,C)) { \
+  LogMessage(kInfo, __FILE__, __LINE__, C) MSG; \
+}} while (0);
+
+#define LOG_WARN(C, MSG) do { \
+if(LogManager::ShouldLog(kWarning,C)) { \
+  LogMessage(kWarning, __FILE__, __LINE__, C) MSG; \
+}} while (0);
+
+#define LOG_ERROR(C, MSG) do { \
+if(LogManager::ShouldLog(kError,C)) { \
+  LogMessage(kError, __FILE__, __LINE__, C) MSG; \
+}} while (0);
+
+
+class LogMessage;
+
+class LoggerInterface {
+ public:
+  LoggerInterface() {};
+  virtual ~LoggerInterface() {};
+
+  /**
+   *  User defined handling messages, common case would be printing somewhere.
+   **/
+  virtual void Write(const LogMessage& msg) = 0;
+};
+
+/**
+ *  StderrLogger unsuprisingly dumps messages to stderr.
+ *  This is the default logger if nothing else is explicitly set.
+ **/
+class StderrLogger : public LoggerInterface {
+ public:
+  StderrLogger() : show_timestamp_(true), show_level_(true),
+                   show_thread_(true), show_component_(true),
+                   show_file_(true) {}
+  void Write(const LogMessage& msg);
+  void set_show_timestamp(bool show);
+  void set_show_level(bool show);
+  void set_show_thread(bool show);
+  void set_show_component(bool show);
+ private:
+  bool show_timestamp_;
+  bool show_level_;
+  bool show_thread_;
+  bool show_component_;
+  bool show_file_;
+};
+
+
+/**
+ *  LogManager provides a thread safe static interface to the underlying
+ *  logger implementation.
+ **/
+class LogManager {
+ friend class LogMessage;
+ public:
+  //  allow easy inlining
+  static bool ShouldLog(LogLevel level, LogSourceComponent source) {
+    std::lock_guard<std::mutex> impl_lock(impl_lock_);
+    if(level < level_threshold_)
+      return false;
+    if(!(source & component_mask_))
+      return false;
+    return true;
+  }
+  static void Write(const LogMessage & msg);
+  static void EnableLogForComponent(LogSourceComponent c);
+  static void DisableLogForComponent(LogSourceComponent c);
+  static void SetLogLevel(LogLevel level);
+  static void SetLoggerImplementation(std::unique_ptr<LoggerInterface> impl);
+
+ private:
+  // don't create instances of this
+  LogManager();
+  // synchronize all unsafe plugin calls
+  static std::mutex impl_lock_;
+  static std::unique_ptr<LoggerInterface> logger_impl_;
+  // component and level masking
+  static uint32_t component_mask_;
+  static uint32_t level_threshold_;
+};
+
+/**
+ *  LogMessage contains message text, along with other metadata about the message.
+ *  Note:  For performance reasons a set of macros (see top of file) is used to
+ *  create these inside of an if block.  Do not instantiate these directly, doing
+ *  so will cause the message to be uncontitionally logged.  This minor inconvinience
+ *  gives us a ~20% performance increase in the (common) case where few messages
+ *  are worth logging; std::stringstream is expensive to construct.
+ **/
+class LogMessage {
+ friend class LogManager;
+ public:
+  LogMessage(const LogLevel &l, const char *file, int line,
+             LogSourceComponent component = kUnknown) :
+             level_(l), component_(component), origin_file_(file), origin_line_(line){}
+
+  ~LogMessage();
+
+  const char *level_string() const;
+  const char *component_string() const;
+  LogLevel level() const {return level_; }
+  LogSourceComponent component() const {return component_; }
+  int file_line() const {return origin_line_; }
+  const char * file_name() const {return origin_file_; }
+
+  //print as-is, indicates when a nullptr was passed in
+  LogMessage& operator<<(const char *);
+  LogMessage& operator<<(const std::string*);
+  LogMessage& operator<<(const std::string&);
+
+  //convert to a string "true"/"false"
+  LogMessage& operator<<(bool);
+
+  //integral types
+  LogMessage& operator<<(int32_t);
+  LogMessage& operator<<(uint32_t);
+  LogMessage& operator<<(int64_t);
+  LogMessage& operator<<(uint64_t);
+
+  //print address as hex
+  LogMessage& operator<<(void *);
+
+  //asio types
+  LogMessage& operator<<(const ::asio::ip::tcp::endpoint& endpoint);
+
+  //thread and mutex types
+  LogMessage& operator<<(const std::thread::id& tid);
+
+
+  std::string MsgString() const;
+
+ private:
+  LogLevel level_;
+  LogSourceComponent component_;
+  const char *origin_file_;
+  const int origin_line_;
+  std::stringstream msg_buffer_;
+};
+
+}
+
+#endif

+ 178 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/namenode_info.cc

@@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "namenode_info.h"
+
+#include "common/util.h"
+#include "common/logging.h"
+
+#include <sstream>
+#include <utility>
+#include <future>
+#include <memory>
+
+namespace hdfs {
+
+ResolvedNamenodeInfo& ResolvedNamenodeInfo::operator=(const NamenodeInfo &info) {
+  nameservice = info.nameservice;
+  name = info.name;
+  uri = info.uri;
+  return *this;
+}
+
+
+
+std::string ResolvedNamenodeInfo::str() const {
+  std::stringstream ss;
+  ss << "ResolvedNamenodeInfo {nameservice: " << nameservice << ", name: " << name << ", uri: " << uri.str();
+  ss << ", host: " << uri.get_host();
+
+  if(uri.has_port())
+    ss << ", port: " << uri.get_port();
+  else
+    ss << ", invalid port (uninitialized)";
+
+  ss << ", scheme: " << uri.get_scheme();
+
+  ss << " [";
+  for(unsigned int i=0;i<endpoints.size();i++)
+    ss << endpoints[i] << " ";
+  ss << "] }";
+
+  return ss.str();
+}
+
+
+bool ResolveInPlace(::asio::io_service *ioservice, ResolvedNamenodeInfo &info) {
+  // this isn't very memory friendly, but if it needs to be called often there are bigger issues at hand
+  info.endpoints.clear();
+  std::vector<ResolvedNamenodeInfo> resolved = BulkResolve(ioservice, {info});
+  if(resolved.size() != 1)
+    return false;
+
+  info.endpoints = resolved[0].endpoints;
+  if(info.endpoints.size() == 0)
+    return false;
+  return true;
+}
+
+typedef std::vector<asio::ip::tcp::endpoint> endpoint_vector;
+
+// RAII wrapper
+class ScopedResolver {
+ private:
+  ::asio::io_service *io_service_;
+  std::string host_;
+  std::string port_;
+  ::asio::ip::tcp::resolver::query query_;
+  ::asio::ip::tcp::resolver resolver_;
+  endpoint_vector endpoints_;
+
+  // Caller blocks on access if resolution isn't finished
+  std::shared_ptr<std::promise<Status>> result_status_;
+ public:
+  ScopedResolver(::asio::io_service *service, const std::string &host, const std::string &port) :
+        io_service_(service), host_(host), port_(port), query_(host, port), resolver_(*io_service_)
+  {
+    if(!io_service_)
+      LOG_ERROR(kAsyncRuntime, << "ScopedResolver@" << this << " passed nullptr to io_service");
+  }
+
+  ~ScopedResolver() {
+    resolver_.cancel();
+  }
+
+  bool BeginAsyncResolve() {
+    // result_status_ would only exist if this was previously called.  Invalid state.
+    if(result_status_) {
+      LOG_ERROR(kAsyncRuntime, << "ScopedResolver@" << this << "::BeginAsyncResolve invalid call: may only be called once per instance");
+      return false;
+    } else if(!io_service_) {
+      LOG_ERROR(kAsyncRuntime, << "ScopedResolver@" << this << "::BeginAsyncResolve invalid call: null io_service");
+      return false;
+    }
+
+    // Now set up the promise, set it in async_resolve's callback
+    result_status_ = std::make_shared<std::promise<Status>>();
+    std::shared_ptr<std::promise<Status>> shared_result = result_status_;
+
+    // Callback to pull a copy of endpoints out of resolver and set promise
+    auto callback = [this, shared_result](const asio::error_code &ec, ::asio::ip::tcp::resolver::iterator out) {
+      if(!ec) {
+        std::copy(out, ::asio::ip::tcp::resolver::iterator(), std::back_inserter(endpoints_));
+      }
+      shared_result->set_value( ToStatus(ec) );
+    };
+    resolver_.async_resolve(query_, callback);
+    return true;
+  }
+
+  Status Join() {
+    if(!result_status_) {
+      std::ostringstream errmsg;
+      errmsg <<  "ScopedResolver@" << this << "Join invalid call: promise never set";
+      return Status::InvalidArgument(errmsg.str().c_str());
+    }
+
+    std::future<Status> future_result = result_status_->get_future();
+    Status res = future_result.get();
+    return res;
+  }
+
+  endpoint_vector GetEndpoints() {
+    // Explicitly return by value to decouple lifecycles.
+    return endpoints_;
+  }
+};
+
+std::vector<ResolvedNamenodeInfo> BulkResolve(::asio::io_service *ioservice, const std::vector<NamenodeInfo> &nodes) {
+  std::vector< std::unique_ptr<ScopedResolver> > resolvers;
+  resolvers.reserve(nodes.size());
+
+  std::vector<ResolvedNamenodeInfo> resolved_info;
+  resolved_info.reserve(nodes.size());
+
+  for(unsigned int i=0; i<nodes.size(); i++) {
+    std::string host = nodes[i].get_host();
+    std::string port = nodes[i].get_port();
+
+    resolvers.emplace_back(new ScopedResolver(ioservice, host, port));
+    resolvers[i]->BeginAsyncResolve();
+  }
+
+  // Join all async operations
+  for(unsigned int i=0; i < resolvers.size(); i++) {
+    Status asyncReturnStatus = resolvers[i]->Join();
+
+    ResolvedNamenodeInfo info;
+    info = nodes[i];
+
+    if(asyncReturnStatus.ok()) {
+      // Copy out endpoints if things went well
+      info.endpoints = resolvers[i]->GetEndpoints();
+    } else {
+      LOG_ERROR(kAsyncRuntime, << "Unabled to resolve endpoints for host: " << nodes[i].get_host()
+                                                               << " port: " << nodes[i].get_port());
+    }
+
+    resolved_info.push_back(info);
+  }
+  return resolved_info;
+}
+
+}

+ 49 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/namenode_info.h

@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_HDFS_NAMENODE_INFO_H_
+#define COMMON_HDFS_NAMENODE_INFO_H_
+
+#include <asio.hpp>
+#include <hdfspp/options.h>
+
+#include <string>
+#include <vector>
+
+namespace hdfs {
+
+// Internal representation of namenode info that keeps track
+// of its endpoints.
+struct ResolvedNamenodeInfo : public NamenodeInfo {
+  ResolvedNamenodeInfo& operator=(const NamenodeInfo &info);
+  std::string str() const;
+
+  std::vector<::asio::ip::tcp::endpoint> endpoints;
+};
+
+// Clear endpoints if set and resolve all of them in parallel.
+// Only successful lookups will be placed in the result set.
+std::vector<ResolvedNamenodeInfo> BulkResolve(::asio::io_service *ioservice, const std::vector<NamenodeInfo> &nodes);
+
+// Clear endpoints, if any, and resolve them again
+// Return true if endpoints were resolved
+bool ResolveInPlace(::asio::io_service *ioservice, ResolvedNamenodeInfo &info);
+
+}
+
+#endif

+ 52 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/new_delete.h

@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_HDFS_NEW_DELETE_H_
+#define COMMON_HDFS_NEW_DELETE_H_
+
+#include <cstring>
+
+struct mem_struct {
+  size_t mem_size;
+};
+
+#ifndef NDEBUG
+#define MEMCHECKED_CLASS(clazz) \
+static void* operator new(size_t size) { \
+  void* p = ::malloc(size); \
+  return p; \
+} \
+static void* operator new[](size_t size) { \
+  mem_struct* p = (mem_struct*)::malloc(sizeof(mem_struct) + size); \
+  p->mem_size = size; \
+  return (void*)++p; \
+} \
+static void operator delete(void* p) { \
+  ::memset(p, 0, sizeof(clazz)); \
+  ::free(p); \
+} \
+static void operator delete[](void* p) { \
+  mem_struct* header = (mem_struct*)p; \
+  size_t size = (--header)->mem_size; \
+  ::memset(p, 0, size); \
+  ::free(header); \
+}
+#else
+#define MEMCHECKED_CLASS(clazz)
+#endif
+#endif

+ 43 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/optional_wrapper.h

@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_OPTIONAL_WRAPPER_H_
+#define COMMON_OPTIONAL_WRAPPER_H_
+
+#ifdef __clang__
+  #pragma clang diagnostic push
+  #if __has_warning("-Wweak-vtables")
+    #pragma clang diagnostic ignored "-Wweak-vtables"
+  #endif
+  #if __has_warning("-Wreserved-id-macro")
+    #pragma clang diagnostic ignored "-Wreserved-id-macro"
+  #endif
+  #if __has_warning("-Wextra-semi")
+    #pragma clang diagnostic ignored "-Wextra-semi"
+  #endif
+  #define TR2_OPTIONAL_DISABLE_EMULATION_OF_TYPE_TRAITS  //For Clang < 3_4_2
+#endif
+
+#include <optional.hpp>
+
+#ifdef __clang__
+  #undef TR2_OPTIONAL_DISABLE_EMULATION_OF_TYPE_TRAITS  //For Clang < 3_4_2
+  #pragma clang diagnostic pop
+#endif
+
+#endif //COMMON_OPTIONAL_WRAPPER_H_

+ 61 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/options.cc

@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hdfspp/options.h"
+
+namespace hdfs {
+
+// The linker needs a place to put all of those constants
+const int Options::kDefaultRpcTimeout;
+const int Options::kNoRetry;
+const int Options::kDefaultMaxRpcRetries;
+const int Options::kDefaultRpcRetryDelayMs;
+const unsigned int Options::kDefaultHostExclusionDuration;
+const unsigned int Options::kDefaultFailoverMaxRetries;
+const unsigned int Options::kDefaultFailoverConnectionMaxRetries;
+const long Options::kDefaultBlockSize;
+
+Options::Options() : rpc_timeout(kDefaultRpcTimeout),
+                     rpc_connect_timeout(kDefaultRpcConnectTimeout),
+                     max_rpc_retries(kDefaultMaxRpcRetries),
+                     rpc_retry_delay_ms(kDefaultRpcRetryDelayMs),
+                     host_exclusion_duration(kDefaultHostExclusionDuration),
+                     defaultFS(),
+                     failover_max_retries(kDefaultFailoverMaxRetries),
+                     failover_connection_max_retries(kDefaultFailoverConnectionMaxRetries),
+                     authentication(kDefaultAuthentication),
+                     block_size(kDefaultBlockSize),
+                     io_threads_(kDefaultIoThreads)
+{
+
+}
+
+std::string NamenodeInfo::get_host() const {
+  return uri.get_host();
+}
+
+std::string NamenodeInfo::get_port() const {
+  if(uri.has_port()) {
+    return std::to_string(uri.get_port());
+  }
+  return "-1";
+}
+
+
+
+}

+ 87 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/retry_policy.cc

@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common/retry_policy.h"
+#include "common/logging.h"
+
+#include <sstream>
+
+namespace hdfs {
+
+RetryAction FixedDelayRetryPolicy::ShouldRetry(
+    const Status &s, uint64_t retries, uint64_t failovers,
+    bool isIdempotentOrAtMostOnce) const {
+  LOG_TRACE(kRPC, << "FixedDelayRetryPolicy::ShouldRetry(retries=" << retries << ", failovers=" << failovers << ")");
+  (void)isIdempotentOrAtMostOnce;
+  if (retries + failovers >= max_retries_) {
+    return RetryAction::fail(
+        "Failovers and retries(" + std::to_string(retries + failovers) +
+        ") exceeded maximum retries (" + std::to_string(max_retries_) + "), Status: " +
+        s.ToString());
+  } else {
+    return RetryAction::retry(delay_);
+  }
+}
+
+
+RetryAction NoRetryPolicy::ShouldRetry(
+    const Status &s, uint64_t retries, uint64_t failovers,
+    bool isIdempotentOrAtMostOnce) const {
+  LOG_TRACE(kRPC, << "NoRetryPolicy::ShouldRetry(retries=" << retries << ", failovers=" << failovers << ")");
+  (void)retries;
+  (void)failovers;
+  (void)isIdempotentOrAtMostOnce;
+  return RetryAction::fail("No retry, Status: " + s.ToString());
+}
+
+
+RetryAction FixedDelayWithFailover::ShouldRetry(const Status &s, uint64_t retries,
+    uint64_t failovers,
+    bool isIdempotentOrAtMostOnce) const {
+  (void)isIdempotentOrAtMostOnce;
+  (void)max_failover_conn_retries_;
+  LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry(retries=" << retries << ", failovers=" << failovers << ")");
+
+  if(failovers < max_failover_retries_ && (s.code() == ::asio::error::timed_out || s.get_server_exception_type() == Status::kStandbyException) )
+  {
+    // Try connecting to another NN in case this one keeps timing out
+    // Can add the backoff wait specified by dfs.client.failover.sleep.base.millis here
+    if(failovers == 0) {
+      // No delay on first failover if it looks like the NN was bad.
+      return RetryAction::failover(0);
+    } else {
+      return RetryAction::failover(delay_);
+    }
+  }
+
+  if(retries < max_retries_ && failovers < max_failover_retries_) {
+    LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry: retries < max_retries_ && failovers < max_failover_retries_");
+    return RetryAction::retry(delay_);
+  } else if (retries >= max_retries_ && failovers < max_failover_retries_) {
+    LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry: retries >= max_retries_ && failovers < max_failover_retries_");
+    return RetryAction::failover(delay_);
+  } else if (retries <= max_retries_ && failovers == max_failover_retries_) {
+    LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry: retries <= max_retries_ && failovers == max_failover_retries_");
+    // 1 last retry on new connection
+    return RetryAction::retry(delay_);
+  }
+
+  return RetryAction::fail("Retry and failover didn't work, Status: " + s.ToString());
+}
+
+}

+ 160 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/retry_policy.h

@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIB_COMMON_RETRY_POLICY_H_
+#define LIB_COMMON_RETRY_POLICY_H_
+
+#include "common/util.h"
+
+#include <string>
+#include <stdint.h>
+
+namespace hdfs {
+
+class RetryAction {
+ public:
+  enum RetryDecision { FAIL, RETRY, FAILOVER_AND_RETRY };
+
+  RetryDecision action;
+  uint64_t delayMillis;
+  std::string reason;
+
+  RetryAction(RetryDecision in_action, uint64_t in_delayMillis,
+              const std::string &in_reason)
+      : action(in_action), delayMillis(in_delayMillis), reason(in_reason) {}
+
+  static RetryAction fail(const std::string &reason) {
+    return RetryAction(FAIL, 0, reason);
+  }
+  static RetryAction retry(uint64_t delay) {
+    return RetryAction(RETRY, delay, "");
+  }
+  static RetryAction failover(uint64_t delay) {
+    return RetryAction(FAILOVER_AND_RETRY, delay, "");
+  }
+
+  std::string decision_str() const {
+    switch(action) {
+      case FAIL: return "FAIL";
+      case RETRY: return "RETRY";
+      case FAILOVER_AND_RETRY: return "FAILOVER_AND_RETRY";
+      default: return "UNDEFINED ACTION";
+    }
+  };
+};
+
+class RetryPolicy {
+ protected:
+  uint64_t delay_;
+  uint64_t max_retries_;
+  RetryPolicy(uint64_t delay, uint64_t max_retries) :
+              delay_(delay), max_retries_(max_retries) {}
+
+ public:
+  RetryPolicy() {};
+
+  virtual ~RetryPolicy() {}
+  /*
+   * If there was an error in communications, responds with the configured
+   * action to take.
+   */
+  virtual RetryAction ShouldRetry(const Status &s, uint64_t retries,
+                                            uint64_t failovers,
+                                            bool isIdempotentOrAtMostOnce) const = 0;
+
+  virtual std::string str() const { return "Base RetryPolicy"; }
+};
+
+
+/*
+ * Overview of how the failover retry policy works:
+ *
+ * 1) Acts the same as FixedDelayRetryPolicy in terms of connection retries against a single NN
+ *    with two differences:
+ *      a) If we have retried more than the maximum number of retries we will failover to the
+ *         other node and reset the retry counter rather than error out.  It will begin the same
+ *         routine on the other node.
+ *      b) If an attempted connection times out and max_failover_conn_retries_ is less than the
+ *         normal number of retries it will failover sooner.  The connection timeout retry limit
+ *         defaults to zero; the idea being that if a node is unresponsive it's better to just
+ *         try the secondary rather than incur the timeout cost multiple times.
+ *
+ * 2) Keeps track of the failover count in the same way that the retry count is tracked.  If failover
+ *    is triggered more than a set number (dfs.client.failover.max.attempts) of times then the operation
+ *    will error out in the same way that a non-HA operation would error if it ran out of retries.
+ *
+ * 3) Failover between namenodes isn't instantaneous so the RPC retry delay is reused to add a small
+ *    delay between failover attempts.  This helps prevent the client from quickly using up all of
+ *    its failover attempts while thrashing between namenodes that are both temporarily marked standby.
+ *    Note: The java client implements exponential backoff here with a base other than the rpc delay,
+ *    and this will do the same here in the future. This doesn't do any sort of exponential backoff
+ *    and the name can be changed to ExponentialDelayWithFailover when backoff is implemented.
+ */
+class FixedDelayWithFailover : public RetryPolicy {
+ public:
+  FixedDelayWithFailover(uint64_t delay, uint64_t max_retries,
+                         uint64_t max_failover_retries,
+                         uint64_t max_failover_conn_retries)
+      : RetryPolicy(delay, max_retries), max_failover_retries_(max_failover_retries),
+        max_failover_conn_retries_(max_failover_conn_retries) {}
+
+  RetryAction ShouldRetry(const Status &s, uint64_t retries,
+                          uint64_t failovers,
+                          bool isIdempotentOrAtMostOnce) const override;
+
+  std::string str() const override { return "FixedDelayWithFailover"; }
+
+ private:
+  // Attempts to fail over
+  uint64_t max_failover_retries_;
+  // Attempts to fail over if connection times out rather than
+  // tring to connect and wait for the timeout delay failover_retries_
+  // times.
+  uint64_t max_failover_conn_retries_;
+};
+
+
+/*
+ * Returns a fixed delay up to a certain number of retries
+ */
+class FixedDelayRetryPolicy : public RetryPolicy {
+ public:
+  FixedDelayRetryPolicy(uint64_t delay, uint64_t max_retries)
+      : RetryPolicy(delay, max_retries) {}
+
+  RetryAction ShouldRetry(const Status &s, uint64_t retries,
+                          uint64_t failovers,
+                          bool isIdempotentOrAtMostOnce) const override;
+
+  std::string str() const override { return "FixedDelayRetryPolicy"; }
+};
+
+/*
+ * Never retries
+ */
+class NoRetryPolicy : public RetryPolicy {
+ public:
+  NoRetryPolicy() {};
+  RetryAction ShouldRetry(const Status &s, uint64_t retries,
+                          uint64_t failovers,
+                          bool isIdempotentOrAtMostOnce) const override;
+
+  std::string str() const override { return "NoRetryPolicy"; }
+};
+}
+
+#endif

+ 66 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/sasl_authenticator.h

@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIB_COMMON_SASL_AUTHENTICATOR_H_
+#define LIB_COMMON_SASL_AUTHENTICATOR_H_
+
+#include "hdfspp/status.h"
+
+namespace hdfs {
+
+class DigestMD5AuthenticatorTest_TestResponse_Test;
+
+/**
+ * A specialized implementation of RFC 2831 for the HDFS
+ * DataTransferProtocol.
+ *
+ * The current lacks the following features:
+ *   * Encoding the username, realm, and password in ISO-8859-1 when
+ * it is required by the RFC. They are always encoded in UTF-8.
+ *   * Checking whether the challenges from the server are
+ * well-formed.
+ *   * Specifying authzid, digest-uri and maximum buffer size.
+ *   * Supporting QOP other than the auth level.
+ **/
+class DigestMD5Authenticator {
+public:
+  Status EvaluateResponse(const std::string &payload, std::string *result);
+  DigestMD5Authenticator(const std::string &username,
+                         const std::string &password, bool mock_nonce = false);
+
+private:
+  Status GenerateFirstResponse(std::string *result);
+  Status GenerateResponseValue(std::string *response_value);
+  Status ParseFirstChallenge(const std::string &payload);
+
+  static size_t NextToken(const std::string &payload, size_t off,
+                          std::string *tok);
+  void GenerateCNonce();
+  std::string username_;
+  std::string password_;
+  std::string nonce_;
+  std::string cnonce_;
+  std::string realm_;
+  std::string qop_;
+  unsigned nonce_count_;
+
+  const bool TEST_mock_cnonce_;
+  friend class DigestMD5AuthenticatorTest_TestResponse_Test;
+};
+}
+
+#endif

+ 240 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/sasl_digest_md5.cc

@@ -0,0 +1,240 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "sasl_authenticator.h"
+
+#include "common/util.h"
+
+#include <openssl/rand.h>
+#include <openssl/md5.h>
+
+#include <iomanip>
+#include <map>
+#include <sstream>
+
+namespace hdfs {
+
+static std::string QuoteString(const std::string &src);
+static std::string GetMD5Digest(const std::string &src);
+static std::string BinaryToHex(const std::string &src);
+
+static const char kDigestUri[] = "hdfs/0";
+static const size_t kMaxBufferSize = 65536;
+
+DigestMD5Authenticator::DigestMD5Authenticator(const std::string &username,
+                                               const std::string &password,
+                                               bool mock_nonce)
+    : username_(username), password_(password), nonce_count_(0),
+      TEST_mock_cnonce_(mock_nonce) {}
+
+Status DigestMD5Authenticator::EvaluateResponse(const std::string &payload,
+                                                std::string *result) {
+  Status status = ParseFirstChallenge(payload);
+  if (status.ok()) {
+    status = GenerateFirstResponse(result);
+  }
+  return status;
+}
+
+size_t DigestMD5Authenticator::NextToken(const std::string &payload, size_t off,
+                                         std::string *tok) {
+  tok->clear();
+  if (off >= payload.size()) {
+    return std::string::npos;
+  }
+
+  char c = payload[off];
+  if (c == '=' || c == ',') {
+    *tok = c;
+    return off + 1;
+  }
+
+  int quote_count = 0;
+  for (; off < payload.size(); ++off) {
+    char c = payload[off];
+    if (c == '"') {
+      ++quote_count;
+      if (quote_count == 2) {
+        return off + 1;
+      }
+      continue;
+    }
+
+    if (c == '=') {
+      if (quote_count) {
+        tok->append(&c, 1);
+      } else {
+        break;
+      }
+    } else if (('0' <= c && c <= '9') || ('a' <= c && c <= 'z') ||
+               ('A' <= c && c <= 'Z') || c == '+' || c == '/' || c == '-' ||
+               c == '_' || c == '@') {
+      tok->append(&c, 1);
+    } else {
+      break;
+    }
+  }
+  return off;
+}
+
+void DigestMD5Authenticator::GenerateCNonce() {
+  if (!TEST_mock_cnonce_) {
+    char buf[8] = {0,};
+    RAND_pseudo_bytes(reinterpret_cast<unsigned char *>(buf), sizeof(buf));
+    cnonce_ = Base64Encode(std::string(buf, sizeof(buf)));
+  }
+}
+
+Status DigestMD5Authenticator::ParseFirstChallenge(const std::string &payload) {
+  std::map<std::string, std::string> props;
+  std::string token;
+  enum {
+    kStateLVal,
+    kStateEqual,
+    kStateRVal,
+    kStateCommaOrEnd,
+  };
+
+  int state = kStateLVal;
+
+  std::string lval, rval;
+  size_t off = 0;
+  while (true) {
+    off = NextToken(payload, off, &token);
+    if (off == std::string::npos) {
+      break;
+    }
+
+    switch (state) {
+    case kStateLVal:
+      lval = token;
+      state = kStateEqual;
+      break;
+    case kStateEqual:
+      state = kStateRVal;
+      break;
+    case kStateRVal:
+      rval = token;
+      props[lval] = rval;
+      state = kStateCommaOrEnd;
+      break;
+    case kStateCommaOrEnd:
+      state = kStateLVal;
+      break;
+    }
+  }
+
+  if (props["algorithm"] != "md5-sess" || props["charset"] != "utf-8" ||
+      props.find("nonce") == props.end()) {
+    return Status::Error("Invalid challenge");
+  }
+  realm_ = props["realm"];
+  nonce_ = props["nonce"];
+  qop_ = props["qop"];
+  return Status::OK();
+}
+
+Status DigestMD5Authenticator::GenerateFirstResponse(std::string *result) {
+  // TODO: Support auth-int and auth-conf
+  // Handle cipher
+  if (qop_ != "auth") {
+    return Status::Unimplemented();
+  }
+
+  std::stringstream ss;
+  GenerateCNonce();
+  ss << "charset=utf-8,username=\"" << QuoteString(username_) << "\""
+     << ",authzid=\"" << QuoteString(username_) << "\""
+     << ",nonce=\"" << QuoteString(nonce_) << "\""
+     << ",digest-uri=\"" << kDigestUri << "\""
+     << ",maxbuf=" << kMaxBufferSize << ",cnonce=\"" << cnonce_ << "\"";
+
+  if (realm_.size()) {
+    ss << ",realm=\"" << QuoteString(realm_) << "\"";
+  }
+
+  ss << ",nc=" << std::hex << std::setw(8) << std::setfill('0')
+     << ++nonce_count_;
+  std::string response_value;
+  GenerateResponseValue(&response_value);
+  ss << ",response=" << response_value;
+  *result = ss.str();
+  return result->size() > 4096 ? Status::Error("Response too big")
+                               : Status::OK();
+}
+
+/**
+ * Generate the response value specified in S 2.1.2.1 in RFC2831.
+ **/
+Status
+DigestMD5Authenticator::GenerateResponseValue(std::string *response_value) {
+  std::stringstream begin_a1, a1_ss;
+  std::string a1, a2;
+
+  if (qop_ == "auth") {
+    a2 = std::string("AUTHENTICATE:") + kDigestUri;
+  } else {
+    a2 = std::string("AUTHENTICATE:") + kDigestUri +
+         ":00000000000000000000000000000000";
+  }
+
+  begin_a1 << username_ << ":" << realm_ << ":" << password_;
+  a1_ss << GetMD5Digest(begin_a1.str()) << ":" << nonce_ << ":" << cnonce_
+        << ":" << username_;
+
+  std::stringstream combine_ss;
+  combine_ss << BinaryToHex(GetMD5Digest(a1_ss.str())) << ":" << nonce_ << ":"
+             << std::hex << std::setw(8) << std::setfill('0') << nonce_count_
+             << ":" << cnonce_ << ":" << qop_ << ":"
+             << BinaryToHex(GetMD5Digest(a2));
+  *response_value = BinaryToHex(GetMD5Digest(combine_ss.str()));
+  return Status::OK();
+}
+
+static std::string QuoteString(const std::string &src) {
+  std::string dst;
+  dst.resize(2 * src.size());
+  size_t j = 0;
+  for (size_t i = 0; i < src.size(); ++i) {
+    if (src[i] == '"') {
+      dst[j++] = '\\';
+    }
+    dst[j++] = src[i];
+  }
+  dst.resize(j);
+  return dst;
+}
+
+static std::string GetMD5Digest(const std::string &src) {
+  MD5_CTX ctx;
+  unsigned long long res[2];
+  MD5_Init(&ctx);
+  MD5_Update(&ctx, src.c_str(), src.size());
+  MD5_Final(reinterpret_cast<unsigned char *>(res), &ctx);
+  return std::string(reinterpret_cast<char *>(res), sizeof(res));
+}
+
+static std::string BinaryToHex(const std::string &src) {
+  std::stringstream ss;
+  ss << std::hex << std::setfill('0');
+  for (size_t i = 0; i < src.size(); ++i) {
+    unsigned c = (unsigned)(static_cast<unsigned char>(src[i]));
+    ss << std::setw(2) << c;
+  }
+  return ss.str();
+}
+}

+ 74 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/statinfo.cc

@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <hdfspp/statinfo.h>
+#include <sys/stat.h>
+#include <sstream>
+#include <iomanip>
+
+namespace hdfs {
+
+StatInfo::StatInfo()
+  : file_type(0),
+    length(0),
+    permissions(0),
+    modification_time(0),
+    access_time(0),
+    block_replication(0),
+    blocksize(0),
+    fileid(0),
+    children_num(0) {
+}
+
+std::string StatInfo::str() const {
+  char perms[11];
+  perms[0] = file_type == StatInfo::IS_DIR ? 'd' : '-';
+  perms[1] = permissions & S_IRUSR? 'r' : '-';
+  perms[2] = permissions & S_IWUSR? 'w': '-';
+  perms[3] = permissions & S_IXUSR? 'x': '-';
+  perms[4] = permissions & S_IRGRP? 'r' : '-';
+  perms[5] = permissions & S_IWGRP? 'w': '-';
+  perms[6] = permissions & S_IXGRP? 'x': '-';
+  perms[7] = permissions & S_IROTH? 'r' : '-';
+  perms[8] = permissions & S_IWOTH? 'w': '-';
+  perms[9] = permissions & S_IXOTH? 'x': '-';
+  perms[10] = 0;
+
+  //Convert to seconds from milliseconds
+  const int time_field_length = 17;
+  time_t rawtime = modification_time/1000;
+  struct tm * timeinfo;
+  char buffer[time_field_length];
+  timeinfo = localtime(&rawtime);
+
+  strftime(buffer,time_field_length,"%Y-%m-%d %H:%M",timeinfo);
+  buffer[time_field_length-1] = 0;  //null terminator
+  std::string time(buffer);
+
+  std::stringstream ss;
+  ss  << std::left << std::setw(12) << perms
+      << std::left << std::setw(3) << (!block_replication ? "-" : std::to_string(block_replication))
+      << std::left << std::setw(15) << owner
+      << std::left << std::setw(15) << group
+      << std::right << std::setw(5) << length
+      << std::right << std::setw(time_field_length + 2) << time//modification_time
+      << "  " << full_path;
+  return ss.str();
+}
+
+}

+ 192 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/status.cc

@@ -0,0 +1,192 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hdfspp/status.h"
+
+#include <cassert>
+#include <sstream>
+#include <cstring>
+#include <map>
+#include <set>
+
+namespace hdfs {
+
+//  Server side exceptions that we capture from the RpcResponseHeaderProto
+const char * kStatusAccessControlException     = "org.apache.hadoop.security.AccessControlException";
+const char * kPathIsNotDirectoryException      = "org.apache.hadoop.fs.PathIsNotDirectoryException";
+const char * kSnapshotException                = "org.apache.hadoop.hdfs.protocol.SnapshotException";
+const char * kStatusStandbyException           = "org.apache.hadoop.ipc.StandbyException";
+const char * kStatusSaslException              = "javax.security.sasl.SaslException";
+const char * kPathNotFoundException            = "org.apache.hadoop.fs.InvalidPathException";
+const char * kPathNotFoundException2           = "java.io.FileNotFoundException";
+const char * kFileAlreadyExistsException       = "org.apache.hadoop.fs.FileAlreadyExistsException";
+const char * kPathIsNotEmptyDirectoryException = "org.apache.hadoop.fs.PathIsNotEmptyDirectoryException";
+
+
+const static std::map<std::string, int> kKnownServerExceptionClasses = {
+                                            {kStatusAccessControlException, Status::kAccessControlException},
+                                            {kPathIsNotDirectoryException, Status::kNotADirectory},
+                                            {kSnapshotException, Status::kSnapshotProtocolException},
+                                            {kStatusStandbyException, Status::kStandbyException},
+                                            {kStatusSaslException, Status::kAuthenticationFailed},
+                                            {kPathNotFoundException, Status::kPathNotFound},
+                                            {kPathNotFoundException2, Status::kPathNotFound},
+                                            {kFileAlreadyExistsException, Status::kFileAlreadyExists},
+                                            {kPathIsNotEmptyDirectoryException, Status::kPathIsNotEmptyDirectory}
+                                        };
+
+// Errors that retry cannot fix. TODO: complete the list.
+const static std::set<int> noRetryExceptions = {
+  Status::kPermissionDenied,
+  Status::kAuthenticationFailed,
+  Status::kAccessControlException
+};
+
+Status::Status(int code, const char *msg1)
+               : code_(code) {
+  if(msg1) {
+    msg_ = msg1;
+  }
+}
+
+Status::Status(int code, const char *exception_class_name, const char *exception_details)
+               : code_(code) {
+  // If we can assure this never gets nullptr args this can be
+  // in the initializer list.
+  if(exception_class_name)
+    exception_class_ = exception_class_name;
+  if(exception_details)
+    msg_ = exception_details;
+
+  std::map<std::string, int>::const_iterator it = kKnownServerExceptionClasses.find(exception_class_);
+  if(it != kKnownServerExceptionClasses.end()) {
+    code_ = it->second;
+  }
+}
+
+
+Status Status::OK() {
+  return Status();
+}
+
+Status Status::InvalidArgument(const char *msg) {
+  return Status(kInvalidArgument, msg);
+}
+
+Status Status::PathNotFound(const char *msg){
+  return Status(kPathNotFound, msg);
+}
+
+Status Status::ResourceUnavailable(const char *msg) {
+  return Status(kResourceUnavailable, msg);
+}
+
+Status Status::PathIsNotDirectory(const char *msg) {
+  return Status(kNotADirectory, msg);
+}
+
+Status Status::Unimplemented() {
+  return Status(kUnimplemented, "");
+}
+
+Status Status::Exception(const char *exception_class_name, const char *error_message) {
+  // Server side exception but can be represented by std::errc codes
+  if (exception_class_name && (strcmp(exception_class_name, kStatusAccessControlException) == 0) )
+    return Status(kPermissionDenied, error_message);
+  else if (exception_class_name && (strcmp(exception_class_name, kStatusSaslException) == 0))
+    return AuthenticationFailed();
+  else if (exception_class_name && (strcmp(exception_class_name, kPathNotFoundException) == 0))
+    return Status(kPathNotFound, error_message);
+  else if (exception_class_name && (strcmp(exception_class_name, kPathNotFoundException2) == 0))
+    return Status(kPathNotFound, error_message);
+  else if (exception_class_name && (strcmp(exception_class_name, kPathIsNotDirectoryException) == 0))
+    return Status(kNotADirectory, error_message);
+  else if (exception_class_name && (strcmp(exception_class_name, kSnapshotException) == 0))
+    return Status(kInvalidArgument, error_message);
+  else if (exception_class_name && (strcmp(exception_class_name, kFileAlreadyExistsException) == 0))
+    return Status(kFileAlreadyExists, error_message);
+  else if (exception_class_name && (strcmp(exception_class_name, kPathIsNotEmptyDirectoryException) == 0))
+    return Status(kPathIsNotEmptyDirectory, error_message);
+  else
+    return Status(kException, exception_class_name, error_message);
+}
+
+Status Status::Error(const char *error_message) {
+  return Exception("Exception", error_message);
+}
+
+Status Status::AuthenticationFailed() {
+  return Status::AuthenticationFailed(nullptr);
+}
+
+Status Status::AuthenticationFailed(const char *msg) {
+  std::string formatted = "AuthenticationFailed";
+  if(msg) {
+    formatted += ": ";
+    formatted += msg;
+  }
+  return Status(kAuthenticationFailed, formatted.c_str());
+}
+
+Status Status::AuthorizationFailed() {
+  return Status::AuthorizationFailed(nullptr);
+}
+
+Status Status::AuthorizationFailed(const char *msg) {
+  std::string formatted = "AuthorizationFailed";
+  if(msg) {
+    formatted += ": ";
+    formatted += msg;
+  }
+  return Status(kPermissionDenied, formatted.c_str());
+}
+
+Status Status::Canceled() {
+  return Status(kOperationCanceled, "Operation canceled");
+}
+
+Status Status::InvalidOffset(const char *msg){
+  return Status(kInvalidOffset, msg);
+}
+
+std::string Status::ToString() const {
+  if (code_ == kOk) {
+    return "OK";
+  }
+  std::stringstream ss;
+  if(!exception_class_.empty()) {
+    ss << exception_class_ << ":";
+  }
+  ss << msg_;
+  return ss.str();
+}
+
+bool Status::notWorthRetry() const {
+  return noRetryExceptions.find(code_) != noRetryExceptions.end();
+}
+
+Status Status::MutexError(const char *msg) {
+  std::string formatted = "MutexError";
+  if(msg) {
+    formatted += ": ";
+    formatted += msg;
+  }
+  return Status(kBusy/*try_lock failure errno*/, msg);
+}
+
+}

+ 454 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/uri.cc

@@ -0,0 +1,454 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <hdfspp/uri.h>
+
+#include <uriparser2/uriparser/Uri.h>
+
+#include <string.h>
+#include <sstream>
+#include <cstdlib>
+#include <cassert>
+#include <limits>
+
+namespace hdfs
+{
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//   Internal utilities
+//
+///////////////////////////////////////////////////////////////////////////////
+
+const char kReserved[] = ":/?#[]@%+";
+
+std::string URI::encode(const std::string & decoded)
+{
+  bool hasCharactersToEncode = false;
+  for (auto c : decoded)
+  {
+    if (isalnum(c) || (strchr(kReserved, c) == NULL))
+    {
+      continue;
+    }
+    else
+    {
+      hasCharactersToEncode = true;
+      break;
+    }
+  }
+
+  if (hasCharactersToEncode)
+  {
+    std::vector<char> buf(decoded.size() * 3 + 1);
+    uriEscapeA(decoded.c_str(), &buf[0], true, URI_BR_DONT_TOUCH);
+    return std::string(&buf[0]);
+  }
+  else
+  {
+    return decoded;
+  }
+}
+
+std::string URI::decode(const std::string & encoded)
+{
+  bool hasCharactersToDecode = false;
+  for (auto c : encoded)
+  {
+    switch (c)
+    {
+    case '%':
+    case '+':
+      hasCharactersToDecode = true;
+      break;
+    default:
+      continue;
+    }
+  }
+
+  if (hasCharactersToDecode)
+  {
+    std::vector<char> buf(encoded.size() + 1);
+    strncpy(&buf[0], encoded.c_str(), buf.size());
+    uriUnescapeInPlaceExA(&buf[0], true, URI_BR_DONT_TOUCH);
+    return std::string(&buf[0]);
+  }
+  else
+  {
+    return encoded;
+  }
+}
+
+std::vector<std::string> split(const std::string input, char separator)
+{
+  std::vector<std::string> result;
+
+  if (!input.empty())
+  {
+    const char * remaining = input.c_str();
+    if (*remaining == '/')
+      remaining++;
+
+    const char * next_end = strchr(remaining, separator);
+    while (next_end) {
+      int len = next_end - remaining;
+      if (len)
+        result.push_back(std::string(remaining, len));
+      else
+        result.push_back("");
+      remaining = next_end + 1;
+      next_end = strchr(remaining, separator);
+    }
+    result.push_back(std::string(remaining));
+  }
+
+  return result;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//   Parsing
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+std::string copy_range(const UriTextRangeA *r) {
+  const int size = r->afterLast - r->first;
+  if (size) {
+      return std::string(r->first, size);
+  }
+  return "";
+}
+
+bool parse_int(const UriTextRangeA *r, int32_t& result)
+{
+  std::string int_str = copy_range(r);
+  if(!int_str.empty()) {
+    errno = 0;
+    unsigned long val = ::strtoul(int_str.c_str(), nullptr, 10);
+    if(errno == 0 && val < std::numeric_limits<uint16_t>::max()) {
+      result = val;
+      return true;
+    } else {
+      return false;
+    }
+  }
+  return true;
+}
+
+
+std::vector<std::string> copy_path(const UriPathSegmentA *ps) {
+    std::vector<std::string> result;
+  if (nullptr == ps)
+      return result;
+
+  for (; ps != 0; ps = ps->next) {
+    result.push_back(copy_range(&ps->text));
+  }
+
+  return result;
+}
+
+void parse_user_info(const UriTextRangeA *r, std::string * user, std::string * pass) {
+  // Output parameters
+  assert(user);
+  assert(pass);
+
+  std::string user_and_password = copy_range(r);
+  if (!user_and_password.empty()) {
+    const char * begin = user_and_password.c_str();
+    const char * colon_loc = strchr(begin, ':');
+    if (colon_loc) {
+      *user = std::string(begin, colon_loc - begin - 1);
+      *pass = colon_loc + 1;
+    } else {
+      *user = user_and_password;
+    }
+  }
+}
+
+
+std::vector<URI::Query> parse_queries(const char *first, const char * afterLast) {
+    std::vector<URI::Query>  result;
+    UriQueryListA * query;
+    int count;
+    int dissect_result = uriDissectQueryMallocExA(&query, &count, first, afterLast, false, URI_BR_DONT_TOUCH);
+    if (URI_SUCCESS == dissect_result) {
+      for (auto ps = query; ps != nullptr; ps = ps->next) {
+        std::string key = ps->key ? URI::encode(ps->key) : "";
+        std::string value = ps->value ? URI::encode(ps->value) : "";
+          result.emplace_back(key, value);
+      }
+      uriFreeQueryListA(query);
+    }
+
+  return result;
+}
+
+// Parse a string into a URI.  Throw a hdfs::uri_parse_error if URI is malformed.
+URI URI::parse_from_string(const std::string &str)
+{
+  URI ret;
+  bool ok = true;
+
+  UriParserStateA state;
+  memset(&state, 0, sizeof(state));
+  UriUriA uu;
+
+  state.uri = &uu;
+  int parseResult = uriParseUriA(&state, str.c_str());
+  ok &= (parseResult == URI_SUCCESS);
+
+  if (ok) {
+    ret.scheme = copy_range(&uu.scheme);
+    ret.host = copy_range(&uu.hostText);
+    ok &= parse_int(&uu.portText, ret._port);
+    ret.path = copy_path(uu.pathHead);
+    ret.queries = parse_queries(uu.query.first, uu.query.afterLast);
+    ret.fragment = copy_range(&uu.fragment);
+    parse_user_info(&uu.userInfo, &ret.user, &ret.pass);
+    uriFreeUriMembersA(&uu);
+  }
+  uriFreeUriMembersA(&uu);
+
+  if (ok) {
+    return ret;
+  } else {
+    throw uri_parse_error(str);
+  }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//   Getters and setters
+//
+///////////////////////////////////////////////////////////////////////////////
+
+URI::URI() : _port(-1) {}
+
+URI::Query::Query(const std::string& k, const std::string& v) : key(k), value(v) {}
+
+std::string URI::str(bool encoded_output) const
+{
+  std::stringstream ss;
+  if (!scheme.empty()) ss << from_encoded(encoded_output, scheme) << "://";
+  if (!user.empty() || !pass.empty()) {
+    if (!user.empty()) ss << from_encoded(encoded_output, user);
+    if (!pass.empty()) ss << ":" << from_encoded(encoded_output, pass);
+    ss << "@";
+  }
+  if (has_authority()) ss << build_authority(encoded_output);
+  if (!path.empty()) ss << get_path(encoded_output);
+  if (!queries.empty()) ss << "?" << get_query(encoded_output);
+  if (!fragment.empty()) ss << "#" << from_encoded(encoded_output, fragment);
+
+  return ss.str();
+}
+
+bool URI::has_authority() const
+{
+  return (!host.empty()) || (has_port());
+}
+
+std::string URI::build_authority(bool encoded_output) const
+{
+  std::stringstream ss;
+  ss << URI::from_encoded(encoded_output, host);
+  if (has_port())
+  {
+    ss << ":" << _port;
+  }
+  return ss.str();
+}
+
+std::string URI::get_scheme(bool encoded_output) const {
+  return from_encoded(encoded_output,scheme);
+}
+
+void URI::set_scheme(const std::string &s, bool encoded_input) {
+  scheme = to_encoded(encoded_input,s);
+}
+
+std::string URI::get_host(bool encoded_output) const {
+  return from_encoded(encoded_output,host);
+}
+
+void URI::set_host(const std::string& h, bool encoded_input) {
+  host = to_encoded(encoded_input,h);
+}
+
+bool URI::has_port() const {
+  return _port != -1;
+}
+
+uint16_t URI::get_port() const {
+  return (uint16_t)_port;
+}
+
+uint16_t URI::get_port_or_default(uint16_t val) const {
+  return has_port() ? (uint16_t)_port : val;
+}
+
+void URI::set_port(uint16_t p)
+{
+  _port = (int32_t)p & 0xFFFF;
+}
+
+void URI::clear_port()
+{
+  _port = -1;
+}
+
+std::string URI::get_path(bool encoded_output) const
+{
+  std::ostringstream out;
+  for (const std::string& s: path) {
+    out << "/" << from_encoded(encoded_output, s);
+  }
+  return out.str();
+}
+
+std::vector<std::string> URI::get_path_elements(bool encoded_output) const
+{
+  std::vector<std::string> result;
+  for (const std::string& path_elem: path) {
+    result.push_back(from_encoded(encoded_output, path_elem));
+  }
+
+  return result;
+}
+
+void URI::parse_path(bool input_encoded, const std::string &input_path)
+{
+  std::vector<std::string> split_path = split(input_path, '/');
+  for (const std::string& s: split_path) {
+    path.push_back(to_encoded(input_encoded, s));
+  }
+}
+
+// Mostly copied and modified from uriparser2.c
+
+void URI::set_path(const std::string &p, bool encoded_input) {
+  parse_path(encoded_input, p);
+}
+
+void URI::add_path(const std::string &p, bool encoded_input)
+{
+  path.push_back(to_encoded(encoded_input, p));
+}
+
+std::string URI::get_query(bool encoded_output) const {
+  bool first = true;
+  std::stringstream ss;
+  for (const Query& q: queries) {
+    if (!first) {
+      ss << "&";
+    }
+    ss << from_encoded(encoded_output, q.key) << "=" << from_encoded(encoded_output, q.value);
+    first = false;
+  }
+
+  return ss.str();
+}
+
+std::vector<URI::Query> URI::get_query_elements(bool encoded_output) const
+{
+  std::vector<Query> result;
+  for (const Query& q: queries) {
+    std::string key = from_encoded(encoded_output, q.key);
+    std::string value = from_encoded(encoded_output, q.value);
+    result.emplace_back(key, value);
+  }
+
+  return result;
+}
+
+void URI::set_query(const std::string &q) {
+  queries = parse_queries(q.c_str(), q.c_str() + q.size() + 1);
+}
+
+
+void URI::add_query(const std::string &name, const std::string & value, bool encoded_input)
+{
+  queries.emplace_back(to_encoded(encoded_input, name), to_encoded(encoded_input, value));
+}
+
+void URI::remove_query(const std::string &q_name, bool encoded_input)
+{
+  if (queries.empty())
+    return;
+
+  // This is the one place we need to do decoded comparisons
+  std::string decoded_key = encoded_input ? decode(q_name) : q_name;
+
+  for (int i = queries.size() - 1; i >= 0; i--) {
+    if (decode(queries[i].key) == decoded_key) {
+      queries.erase(queries.begin() + i);
+    }
+  }
+}
+
+std::string URI::get_fragment(bool encoded_output) const {
+  return from_encoded(encoded_output, fragment);
+}
+
+void URI::set_fragment(const std::string &f, bool encoded_input) {
+  fragment = to_encoded(encoded_input,f);
+}
+
+std::string URI::from_encoded(bool encoded_output, const std::string & input) {
+  return encoded_output ? input : decode(input);
+}
+
+std::string URI::to_encoded(bool encoded_input, const std::string & input) {
+  return encoded_input ? input : encode(input);
+}
+
+std::string URI::GetDebugString() const {
+  std::stringstream ss;
+  ss << std::endl;
+  ss << "\t" << "uri.str() = \"" << str() << "\"" << std::endl;
+  ss << "\t" << "uri.get_scheme() = \"" << get_scheme() << "\"" << std::endl;
+  ss << "\t" << "uri.get_host() = \"" << get_host() << "\"" << std::endl;
+
+  if(_port == -1)
+    ss << "\t" << "uri.get_port() = invalid (uninitialized)" << std::endl;
+  else
+    ss << "\t" << "uri.get_port() = \"" << _port << "\"" << std::endl;
+
+  ss << "\t" << "uri.get_path() = \"" << get_path() << "\"" << std::endl;
+  ss << "\t" << "uri.get_fragment() = \"" << get_fragment() << "\"" << std::endl;
+
+
+  std::vector<Query> query_elems = get_query_elements();
+
+  if(query_elems.size() > 0)
+    ss << "\t" << "Query elements:" << std::endl;
+
+  for(auto qry = query_elems.begin(); qry != query_elems.end(); qry++) {
+    ss << "\t\t" << qry->key << " -> " << qry->value << std::endl;
+  }
+
+  return ss.str();
+}
+
+} // end namespace hdfs

Some files were not shown because too many files changed in this diff