瀏覽代碼

Revert "Revert "Merge branch 'trunk' into HDFS-7240""
After testing it was confirmed that these changes work as
expected.

This reverts commit 7a542fb3270953fff039c9b1bd7ba7afa35a842c.

Anu Engineer 7 年之前
父節點
當前提交
b78c94f44c
共有 100 個文件被更改,包括 11055 次插入107 次删除
  1. 11 21
      BUILDING.txt
  2. 1 0
      dev-support/bin/dist-layout-stitching
  3. 39 0
      dev-support/bin/win-vs-upgrade.cmd
  4. 3 0
      dev-support/docker/Dockerfile
  5. 49 0
      dev-support/win-paths-eg.cmd
  6. 0 15
      hadoop-common-project/hadoop-annotations/pom.xml
  7. 28 0
      hadoop-common-project/hadoop-common/pom.xml
  8. 4 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
  9. 24 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ThreadUtil.java
  10. 2 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/VersionInfo.java
  11. 2 0
      hadoop-common-project/hadoop-common/src/main/native/native.vcxproj
  12. 33 0
      hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/crypto/OpensslCipher.c
  13. 31 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
  14. 2 2
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextTestHelper.java
  15. 31 1
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java
  16. 5 2
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
  17. 6 0
      hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java
  18. 1 1
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
  19. 98 18
      hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml
  20. 7 10
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/CMakeLists.txt
  21. 16 6
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/native_mini_dfs.c
  22. 8 3
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/native_mini_dfs.h
  23. 350 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/test_libhdfs_mini_stress.c
  24. 59 11
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/test_libhdfs_threaded.c
  25. 49 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CMake/FindCyrusSASL.cmake
  26. 44 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CMake/FindGSasl.cmake
  27. 297 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CMakeLists.txt
  28. 161 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CONTRIBUTING.md
  29. 35 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/doc/Doxyfile.in
  30. 5 13
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/doc/mainpage.dox
  31. 20 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/CMakeLists.txt
  32. 20 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/CMakeLists.txt
  33. 27 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/cat/CMakeLists.txt
  34. 121 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/cat/cat.c
  35. 27 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/connect_cancel/CMakeLists.txt
  36. 107 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/connect_cancel/connect_cancel.c
  37. 24 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/CMakeLists.txt
  38. 27 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/cat/CMakeLists.txt
  39. 89 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/cat/cat.cc
  40. 27 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/connect_cancel/CMakeLists.txt
  41. 154 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/connect_cancel/connect_cancel.cc
  42. 27 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/find/CMakeLists.txt
  43. 140 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/find/find.cc
  44. 27 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/gendirs/CMakeLists.txt
  45. 122 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/gendirs/gendirs.cc
  46. 177 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/block_location.h
  47. 68 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/config_parser.h
  48. 48 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/content_summary.h
  49. 141 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/events.h
  50. 48 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/fsinfo.h
  51. 394 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/hdfs_ext.h
  52. 492 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/hdfspp.h
  53. 110 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/locks.h
  54. 60 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/log.h
  55. 136 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/options.h
  56. 59 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/statinfo.h
  57. 111 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/status.h
  58. 137 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/uri.h
  59. 25 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/CMakeLists.txt
  60. 19 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/bindings/CMakeLists.txt
  61. 21 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/bindings/c/CMakeLists.txt
  62. 2007 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/bindings/c/hdfs.cc
  63. 24 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/CMakeLists.txt
  64. 49 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/async_stream.h
  65. 18 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/auth_info.cc
  66. 90 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/auth_info.h
  67. 37 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/cancel_tracker.cc
  68. 40 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/cancel_tracker.h
  69. 219 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/config_parser.cc
  70. 169 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration.cc
  71. 108 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration.h
  72. 328 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration_loader.cc
  73. 138 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration_loader.h
  74. 122 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration_loader_impl.h
  75. 55 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/content_summary.cc
  76. 65 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/continuation/asio.h
  77. 137 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/continuation/continuation.h
  78. 129 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/continuation/protobuf.h
  79. 61 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/fsinfo.cc
  80. 210 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/hdfs_configuration.cc
  81. 70 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/hdfs_configuration.h
  82. 146 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/hdfs_ioservice.cc
  83. 79 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/hdfs_ioservice.h
  84. 89 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/libhdfs_events_impl.cc
  85. 59 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/libhdfs_events_impl.h
  86. 100 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/locks.cc
  87. 227 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/logging.cc
  88. 217 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/logging.h
  89. 178 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/namenode_info.cc
  90. 49 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/namenode_info.h
  91. 52 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/new_delete.h
  92. 43 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/optional_wrapper.h
  93. 61 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/options.cc
  94. 87 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/retry_policy.cc
  95. 160 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/retry_policy.h
  96. 66 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/sasl_authenticator.h
  97. 240 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/sasl_digest_md5.cc
  98. 74 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/statinfo.cc
  99. 192 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/status.cc
  100. 454 0
      hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/uri.cc

+ 11 - 21
BUILDING.txt

@@ -11,6 +11,8 @@ Requirements:
 * Zlib devel (if compiling native code)
 * openssl devel (if compiling native hadoop-pipes and to get the best HDFS encryption performance)
 * Linux FUSE (Filesystem in Userspace) version 2.6 or above (if compiling fuse_dfs)
+* Jansson C XML parsing library ( if compiling libwebhdfs )
+* Doxygen ( if compiling libhdfspp and generating the documents )
 * Internet connection for first build (to fetch all Maven and Hadoop dependencies)
 * python (for releasedocs)
 * bats (for shell code testing)
@@ -348,7 +350,7 @@ Requirements:
 * Maven 3.0 or later
 * ProtocolBuffer 2.5.0
 * CMake 3.1 or newer
-* Windows SDK 7.1 or Visual Studio 2010 Professional
+* Visual Studio 2010 Professional or Higher
 * Windows SDK 8.1 (if building CPU rate control for the container executor)
 * zlib headers (if building native code bindings for zlib)
 * Internet connection for first build (to fetch all Maven and Hadoop dependencies)
@@ -359,18 +361,15 @@ Requirements:
 Unix command-line tools are also included with the Windows Git package which
 can be downloaded from http://git-scm.com/downloads
 
-If using Visual Studio, it must be Visual Studio 2010 Professional (not 2012).
+If using Visual Studio, it must be Professional level or higher.
 Do not use Visual Studio Express.  It does not support compiling for 64-bit,
-which is problematic if running a 64-bit system.  The Windows SDK 7.1 is free to
-download here:
-
-http://www.microsoft.com/en-us/download/details.aspx?id=8279
+which is problematic if running a 64-bit system.
 
 The Windows SDK 8.1 is available to download at:
 
 http://msdn.microsoft.com/en-us/windows/bg162891.aspx
 
-Cygwin is neither required nor supported.
+Cygwin is not required.
 
 ----------------------------------------------------------------------------------
 Building:
@@ -378,21 +377,12 @@ Building:
 Keep the source code tree in a short path to avoid running into problems related
 to Windows maximum path length limitation (for example, C:\hdc).
 
-Run builds from a Windows SDK Command Prompt. (Start, All Programs,
-Microsoft Windows SDK v7.1, Windows SDK 7.1 Command Prompt).
-
-JAVA_HOME must be set, and the path must not contain spaces. If the full path
-would contain spaces, then use the Windows short path instead.
-
-You must set the Platform environment variable to either x64 or Win32 depending
-on whether you're running a 64-bit or 32-bit system. Note that this is
-case-sensitive. It must be "Platform", not "PLATFORM" or "platform".
-Environment variables on Windows are usually case-insensitive, but Maven treats
-them as case-sensitive. Failure to set this environment variable correctly will
-cause msbuild to fail while building the native code in hadoop-common.
+There is one support command file located in dev-support called win-paths-eg.cmd.
+It should be copied somewhere convenient and modified to fit your needs.
 
-set Platform=x64 (when building on a 64-bit system)
-set Platform=Win32 (when building on a 32-bit system)
+win-paths-eg.cmd sets up the environment for use. You will need to modify this
+file. It will put all of the required components in the command path,
+configure the bit-ness of the build, and set several optional components.
 
 Several tests require that the user must have the Create Symbolic Links
 privilege.

+ 1 - 0
dev-support/bin/dist-layout-stitching

@@ -128,6 +128,7 @@ run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs/target/hadoop-hdfs-${VERSION}"
 run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs-nfs/target/hadoop-hdfs-nfs-${VERSION}" .
 run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs-client/target/hadoop-hdfs-client-${VERSION}" .
 run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs-native-client/target/hadoop-hdfs-native-client-${VERSION}" .
+run copy "${ROOT}/hadoop-hdfs-project/hadoop-hdfs-rbf/target/hadoop-hdfs-rbf-${VERSION}" .
 
 run copy "${ROOT}/hadoop-yarn-project/target/hadoop-yarn-project-${VERSION}" .
 run copy "${ROOT}/hadoop-mapreduce-project/target/hadoop-mapreduce-${VERSION}" .

+ 39 - 0
dev-support/bin/win-vs-upgrade.cmd

@@ -0,0 +1,39 @@
+@ECHO OFF
+@REM Licensed to the Apache Software Foundation (ASF) under one or more
+@REM contributor license agreements.  See the NOTICE file distributed with
+@REM this work for additional information regarding copyright ownership.
+@REM The ASF licenses this file to You under the Apache License, Version 2.0
+@REM (the "License"); you may not use this file except in compliance with
+@REM the License.  You may obtain a copy of the License at
+@REM
+@REM     http://www.apache.org/licenses/LICENSE-2.0
+@REM
+@REM Unless required by applicable law or agreed to in writing, software
+@REM distributed under the License is distributed on an "AS IS" BASIS,
+@REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@REM See the License for the specific language governing permissions and
+@REM limitations under the License.
+
+@WHERE devenv
+IF %ERRORLEVEL% NEQ 0 (
+  @ECHO "devenv command was not found. Verify your compiler installation level."
+  EXIT /b 1
+)
+
+@REM Need to save output to a file because for loop will just
+@REM loop forever... :(
+
+SET srcdir=%1
+SET workdir=%2
+
+IF EXIST %srcdir%\Backup (
+  @ECHO "Solution files already upgraded."
+  EXIT /b 0
+)
+
+CD %srcdir%
+DIR /B *.sln > %workdir%\HADOOP-SLN-UPGRADE.TXT
+
+FOR /F %%f IN (%workdir%\HADOOP-SLN-UPGRADE.TXT) DO (
+  devenv %%f /upgrade
+)

+ 3 - 0
dev-support/docker/Dockerfile

@@ -42,6 +42,7 @@ RUN apt-get -q update && apt-get -q install -y \
     apt-utils \
     build-essential \
     bzip2 \
+    clang \
     curl \
     doxygen \
     fuse \
@@ -54,6 +55,7 @@ RUN apt-get -q update && apt-get -q install -y \
     libfuse-dev \
     libprotobuf-dev \
     libprotoc-dev \
+    libsasl2-dev \
     libsnappy-dev \
     libssl-dev \
     libtool \
@@ -71,6 +73,7 @@ RUN apt-get -q update && apt-get -q install -y \
     software-properties-common \
     snappy \
     sudo \
+    valgrind \
     zlib1g-dev
 
 #######

+ 49 - 0
dev-support/win-paths-eg.cmd

@@ -0,0 +1,49 @@
+@ECHO OFF
+@REM Licensed to the Apache Software Foundation (ASF) under one or more
+@REM contributor license agreements.  See the NOTICE file distributed with
+@REM this work for additional information regarding copyright ownership.
+@REM The ASF licenses this file to You under the Apache License, Version 2.0
+@REM (the "License"); you may not use this file except in compliance with
+@REM the License.  You may obtain a copy of the License at
+@REM
+@REM     http://www.apache.org/licenses/LICENSE-2.0
+@REM
+@REM Unless required by applicable law or agreed to in writing, software
+@REM distributed under the License is distributed on an "AS IS" BASIS,
+@REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@REM See the License for the specific language governing permissions and
+@REM limitations under the License.
+
+@REM *************************************************
+@REM JDK and these settings MUST MATCH
+@REM
+@REM 64-bit : Platform = x64, VCVARSPLAT = amd64
+@REM
+@REM 32-bit : Platform = Win32, VCVARSPLAT = x86
+@REM
+
+SET Platform=x64
+SET VCVARSPLAT=amd64
+
+@REM ******************
+@REM Forcibly move the Maven local repo
+
+SET MAVEN_OPTS=-Dmaven.repo.local=C:\Tools\m2
+
+@REM *******************************************
+@REM
+@REM Locations of your bits and pieces
+@REM
+@REM NOTE: cmake is assumed to already be on the
+@REM command path
+@REM
+
+SET MAVEN_HOME=C:\Tools\apache-maven-3.5.0
+SET JAVA_HOME=C:\Tools\jdk
+SET MSVS=C:\Program Files (x86)\Microsoft Visual Studio 12.0
+SET PROTO_BIN=C:\Tools\protobuf-2.5.0
+SET GIT_HOME=C:\Program Files\Git
+
+SET PATH=%JAVA_HOME%\bin;%MAVEN_HOME%\bin;%PROTO_BIN%;%GIT_HOME%\bin;%PATH%
+
+CALL "%MSVS%\VC\vcvarsall.bat" %VCVARSPLAT%

+ 0 - 15
hadoop-common-project/hadoop-annotations/pom.xml

@@ -38,21 +38,6 @@
   </dependencies>
 
   <profiles>
-    <profile>
-      <id>jdk1.7</id>
-      <activation>
-        <jdk>1.7</jdk>
-      </activation>
-      <dependencies>
-        <dependency>
-          <groupId>jdk.tools</groupId>
-          <artifactId>jdk.tools</artifactId>
-          <version>1.7</version>
-          <scope>system</scope>
-          <systemPath>${java.home}/../lib/tools.jar</systemPath>
-        </dependency>
-      </dependencies>
-    </profile>
     <profile>
       <id>jdk1.8</id>
       <activation>

+ 28 - 0
hadoop-common-project/hadoop-common/pom.xml

@@ -838,6 +838,20 @@
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>exec-maven-plugin</artifactId>
             <executions>
+              <execution>
+                <id>convert-ms-winutils</id>
+                <phase>generate-sources</phase>
+                <goals>
+                  <goal>exec</goal>
+                </goals>
+                <configuration>
+                  <executable>${basedir}\..\..\dev-support\bin\win-vs-upgrade.cmd</executable>
+                  <arguments>
+                    <argument>${basedir}\src\main\winutils</argument>
+                    <argument>${project.build.directory}</argument>
+                  </arguments>
+                </configuration>
+              </execution>
               <execution>
                 <id>compile-ms-winutils</id>
                 <phase>compile</phase>
@@ -857,6 +871,20 @@
                   </arguments>
                 </configuration>
               </execution>
+              <execution>
+                <id>convert-ms-native-dll</id>
+                <phase>generate-sources</phase>
+                <goals>
+                  <goal>exec</goal>
+                </goals>
+                <configuration>
+                  <executable>${basedir}\..\..\dev-support\bin\win-vs-upgrade.cmd</executable>
+                  <arguments>
+                    <argument>${basedir}\src\main\native</argument>
+                    <argument>${project.build.directory}</argument>
+                  </arguments>
+                </configuration>
+              </execution>
               <execution>
                 <id>compile-ms-native-dll</id>
                 <phase>compile</phase>

+ 4 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java

@@ -816,8 +816,11 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
    */
   @SuppressWarnings("unchecked")
   public Configuration(Configuration other) {
-    this.resources = (ArrayList<Resource>) other.resources.clone();
     synchronized(other) {
+      // Make sure we clone a finalized state
+      // Resources like input streams can be processed only once
+      other.getProps();
+      this.resources = (ArrayList<Resource>) other.resources.clone();
       if (other.properties != null) {
         this.properties = (Properties)other.properties.clone();
       }

+ 24 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ThreadUtil.java

@@ -53,8 +53,7 @@ public class ThreadUtil {
    * Convenience method that returns a resource as inputstream from the
    * classpath.
    * <p>
-   * It first attempts to use the Thread's context classloader and if not
-   * set it uses the class' classloader.
+   * Uses the Thread's context classloader to load resource.
    *
    * @param resourceName resource to retrieve.
    *
@@ -68,6 +67,27 @@ public class ThreadUtil {
       throw new IOException("Can not read resource file '" + resourceName +
           "' because class loader of the current thread is null");
     }
+    return getResourceAsStream(cl, resourceName);
+  }
+
+  /**
+   * Convenience method that returns a resource as inputstream from the
+   * classpath using given classloader.
+   * <p>
+   *
+   * @param cl ClassLoader to be used to retrieve resource.
+   * @param resourceName resource to retrieve.
+   *
+   * @throws IOException thrown if resource cannot be loaded
+   * @return inputstream with the resource.
+   */
+  public static InputStream getResourceAsStream(ClassLoader cl,
+        String resourceName)
+        throws IOException {
+    if (cl == null) {
+      throw new IOException("Can not read resource file '" + resourceName +
+          "' because given class loader is null");
+    }
     InputStream is = cl.getResourceAsStream(resourceName);
     if (is == null) {
       throw new IOException("Can not read resource file '" +
@@ -75,4 +95,6 @@ public class ThreadUtil {
     }
     return is;
   }
+
+
 }

+ 2 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/VersionInfo.java

@@ -43,7 +43,8 @@ public class VersionInfo {
     String versionInfoFile = component + "-version-info.properties";
     InputStream is = null;
     try {
-      is = ThreadUtil.getResourceAsStream(versionInfoFile);
+      is = ThreadUtil.getResourceAsStream(VersionInfo.class.getClassLoader(),
+          versionInfoFile);
       info.load(is);
     } catch (IOException ex) {
       LoggerFactory.getLogger(getClass()).warn("Could not read '" +

+ 2 - 0
hadoop-common-project/hadoop-common/src/main/native/native.vcxproj

@@ -71,6 +71,7 @@
   <PropertyGroup>
     <SnappyLib Condition="Exists('$(CustomSnappyPrefix)\snappy.dll')">$(CustomSnappyPrefix)</SnappyLib>
     <SnappyLib Condition="Exists('$(CustomSnappyPrefix)\lib\snappy.dll') And '$(SnappyLib)' == ''">$(CustomSnappyPrefix)\lib</SnappyLib>
+    <SnappyLib Condition="Exists('$(CustomSnappyPrefix)\bin\snappy.dll') And '$(SnappyLib)' == ''">$(CustomSnappyPrefix)\bin</SnappyLib>
     <SnappyLib Condition="Exists('$(CustomSnappyLib)') And '$(SnappyLib)' == ''">$(CustomSnappyLib)</SnappyLib>
     <SnappyInclude Condition="Exists('$(CustomSnappyPrefix)\snappy.h')">$(CustomSnappyPrefix)</SnappyInclude>
     <SnappyInclude Condition="Exists('$(CustomSnappyPrefix)\include\snappy.h') And '$(SnappyInclude)' == ''">$(CustomSnappyPrefix)\include</SnappyInclude>
@@ -82,6 +83,7 @@
   <PropertyGroup>
     <IsalLib Condition="Exists('$(CustomIsalPrefix)\isa-l.dll')">$(CustomIsalPrefix)</IsalLib>
     <IsalLib Condition="Exists('$(CustomIsalPrefix)\lib\isa-l.dll') And '$(IsalLib)' == ''">$(CustomIsalPrefix)\lib</IsalLib>
+    <IsalLib Condition="Exists('$(CustomIsalPrefix)\bin\isa-l.dll') And '$(IsalLib)' == ''">$(CustomIsalPrefix)\bin</IsalLib>
     <IsalLib Condition="Exists('$(CustomIsalLib)') And '$(IsalLib)' == ''">$(CustomIsalLib)</IsalLib>
     <IsalEnabled Condition="'$(IsalLib)' != ''">true</IsalEnabled>
   </PropertyGroup>

+ 33 - 0
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/crypto/OpensslCipher.c

@@ -27,8 +27,12 @@
 #ifdef UNIX
 static EVP_CIPHER_CTX * (*dlsym_EVP_CIPHER_CTX_new)(void);
 static void (*dlsym_EVP_CIPHER_CTX_free)(EVP_CIPHER_CTX *);
+#if OPENSSL_API_COMPAT < 0x10100000L && OPENSSL_VERSION_NUMBER >= 0x10100000L
+static int (*dlsym_EVP_CIPHER_CTX_reset)(EVP_CIPHER_CTX *);
+#else
 static int (*dlsym_EVP_CIPHER_CTX_cleanup)(EVP_CIPHER_CTX *);
 static void (*dlsym_EVP_CIPHER_CTX_init)(EVP_CIPHER_CTX *);
+#endif
 static int (*dlsym_EVP_CIPHER_CTX_set_padding)(EVP_CIPHER_CTX *, int);
 static int (*dlsym_EVP_CIPHER_CTX_test_flags)(const EVP_CIPHER_CTX *, int);
 static int (*dlsym_EVP_CIPHER_CTX_block_size)(const EVP_CIPHER_CTX *);
@@ -123,10 +127,16 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_crypto_OpensslCipher_initIDs
                       "EVP_CIPHER_CTX_new");
   LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_free, env, openssl,  \
                       "EVP_CIPHER_CTX_free");
+#if OPENSSL_API_COMPAT < 0x10100000L && OPENSSL_VERSION_NUMBER >= 0x10100000L
+  LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_reset, env, openssl,  \
+                      "EVP_CIPHER_CTX_reset");
+#else
   LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_cleanup, env, openssl,  \
                       "EVP_CIPHER_CTX_cleanup");
   LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_init, env, openssl,  \
                       "EVP_CIPHER_CTX_init");
+#endif
+
   LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_set_padding, env, openssl,  \
                       "EVP_CIPHER_CTX_set_padding");
   LOAD_DYNAMIC_SYMBOL(dlsym_EVP_CIPHER_CTX_test_flags, env, openssl,  \
@@ -271,7 +281,11 @@ JNIEXPORT jlong JNICALL Java_org_apache_hadoop_crypto_OpensslCipher_init
   (*env)->ReleaseByteArrayElements(env, key, jKey, 0);
   (*env)->ReleaseByteArrayElements(env, iv, jIv, 0);
   if (rc == 0) {
+#if OPENSSL_API_COMPAT < 0x10100000L && OPENSSL_VERSION_NUMBER >= 0x10100000L
+    dlsym_EVP_CIPHER_CTX_reset(context);
+#else
     dlsym_EVP_CIPHER_CTX_cleanup(context);
+#endif
     THROW(env, "java/lang/InternalError", "Error in EVP_CipherInit_ex.");
     return (jlong)0;
   }
@@ -334,7 +348,11 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_crypto_OpensslCipher_update
   int output_len = 0;
   if (!dlsym_EVP_CipherUpdate(context, output_bytes, &output_len,  \
       input_bytes, input_len)) {
+#if OPENSSL_API_COMPAT < 0x10100000L && OPENSSL_VERSION_NUMBER >= 0x10100000L
+    dlsym_EVP_CIPHER_CTX_reset(context);
+#else
     dlsym_EVP_CIPHER_CTX_cleanup(context);
+#endif
     THROW(env, "java/lang/InternalError", "Error in EVP_CipherUpdate.");
     return 0;
   }
@@ -376,7 +394,11 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_crypto_OpensslCipher_doFinal
   
   int output_len = 0;
   if (!dlsym_EVP_CipherFinal_ex(context, output_bytes, &output_len)) {
+#if OPENSSL_API_COMPAT < 0x10100000L && OPENSSL_VERSION_NUMBER >= 0x10100000L
+    dlsym_EVP_CIPHER_CTX_reset(context);
+#else
     dlsym_EVP_CIPHER_CTX_cleanup(context);
+#endif
     THROW(env, "java/lang/InternalError", "Error in EVP_CipherFinal_ex.");
     return 0;
   }
@@ -396,6 +418,16 @@ JNIEXPORT jstring JNICALL Java_org_apache_hadoop_crypto_OpensslCipher_getLibrary
     (JNIEnv *env, jclass clazz) 
 {
 #ifdef UNIX
+#if OPENSSL_API_COMPAT < 0x10100000L && OPENSSL_VERSION_NUMBER >= 0x10100000L
+  if (dlsym_EVP_CIPHER_CTX_reset) {
+    Dl_info dl_info;
+    if(dladdr(
+        dlsym_EVP_CIPHER_CTX_reset,
+        &dl_info)) {
+      return (*env)->NewStringUTF(env, dl_info.dli_fname);
+    }
+  }
+#else
   if (dlsym_EVP_CIPHER_CTX_init) {
     Dl_info dl_info;
     if(dladdr(
@@ -404,6 +436,7 @@ JNIEXPORT jstring JNICALL Java_org_apache_hadoop_crypto_OpensslCipher_getLibrary
       return (*env)->NewStringUTF(env, dl_info.dli_fname);
     }
   }
+#endif
 
   return (*env)->NewStringUTF(env, HADOOP_OPENSSL_LIBRARY);
 #endif

+ 31 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java

@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.conf;
 
+import java.io.BufferedInputStream;
 import java.io.BufferedWriter;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
@@ -2419,4 +2420,34 @@ public class TestConfiguration {
       System.setOut(output);
     }
   }
+
+  /**
+   * Test race conditions between clone() and getProps().
+   * Test for race conditions in the way Hadoop handles the Configuration
+   * class. The scenario is the following. Let's assume that there are two
+   * threads sharing the same Configuration class. One adds some resources
+   * to the configuration, while the other one clones it. Resources are
+   * loaded lazily in a deferred call to loadResources(). If the cloning
+   * happens after adding the resources but before parsing them, some temporary
+   * resources like input stream pointers are cloned. Eventually both copies
+   * will load the same input stream resources.
+   * One parses the input stream XML and closes it updating it's own copy of
+   * the resource. The other one has another pointer to the same input stream.
+   * When it tries to load it, it will crash with a stream closed exception.
+   */
+  @Test
+  public void testResourceRace() {
+    InputStream is =
+        new BufferedInputStream(new ByteArrayInputStream(
+            "<configuration></configuration>".getBytes()));
+    Configuration config = new Configuration();
+    // Thread 1
+    config.addResource(is);
+    // Thread 2
+    Configuration confClone = new Configuration(conf);
+    // Thread 2
+    confClone.get("firstParse");
+    // Thread 1
+    config.get("secondParse");
+  }
 }

+ 2 - 2
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextTestHelper.java

@@ -43,7 +43,7 @@ public final class FileContextTestHelper {
    * Create a context with test root relative to the test directory
    */
   public FileContextTestHelper() {
-    this(GenericTestUtils.getRandomizedTestDir().getAbsolutePath());
+    this(GenericTestUtils.getRandomizedTestDir().getPath());
   }
 
   /**
@@ -83,7 +83,7 @@ public final class FileContextTestHelper {
         absTestRootDir = testRootDir;
       } else {
         absTestRootDir = fc.getWorkingDirectory().toString() + "/"
-            + testRootDir;
+            + new Path(testRootDir).toUri();
       }
     }
     return absTestRootDir;

+ 31 - 1
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java

@@ -228,9 +228,9 @@ public class ContractTestUtils extends Assert {
   public static void verifyFileContents(FileSystem fs,
                                         Path path,
                                         byte[] original) throws IOException {
+    assertIsFile(fs, path);
     FileStatus stat = fs.getFileStatus(path);
     String statText = stat.toString();
-    assertTrue("not a file " + statText, stat.isFile());
     assertEquals("wrong length " + statText, original.length, stat.getLen());
     byte[] bytes = readDataset(fs, path, original.length);
     compareByteArrays(original, bytes, original.length);
@@ -853,6 +853,36 @@ public class ContractTestUtils extends Assert {
                        status.isSymlink());
   }
 
+  /**
+   * Assert that a varargs list of paths exist.
+   * @param fs filesystem
+   * @param message message for exceptions
+   * @param paths paths
+   * @throws IOException IO failure
+   */
+  public static void assertPathsExist(FileSystem fs,
+      String message,
+      Path... paths) throws IOException {
+    for (Path path : paths) {
+      assertPathExists(fs, message, path);
+    }
+  }
+
+  /**
+   * Assert that a varargs list of paths do not exist.
+   * @param fs filesystem
+   * @param message message for exceptions
+   * @param paths paths
+   * @throws IOException IO failure
+   */
+  public static void assertPathsDoNotExist(FileSystem fs,
+      String message,
+      Path... paths) throws IOException {
+    for (Path path : paths) {
+      assertPathDoesNotExist(fs, message, path);
+    }
+  }
+
   /**
    * Create a dataset for use in the tests; all data is in the range
    * base to (base+modulo-1) inclusive.

+ 5 - 2
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java

@@ -44,6 +44,7 @@ import org.apache.commons.lang.RandomStringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Time;
 import org.apache.log4j.Appender;
@@ -248,7 +249,7 @@ public abstract class GenericTestUtils {
    * @return the absolute directory for tests. Caller is expected to create it.
    */
   public static File getRandomizedTestDir() {
-    return new File(getRandomizedTempPath()).getAbsoluteFile();
+    return new File(getRandomizedTempPath());
   }
 
   /**
@@ -259,7 +260,9 @@ public abstract class GenericTestUtils {
    * @return a string to use in paths
    */
   public static String getTempPath(String subpath) {
-    String prop = System.getProperty(SYSPROP_TEST_DATA_DIR, DEFAULT_TEST_DATA_PATH);
+    String prop = (Path.WINDOWS) ? DEFAULT_TEST_DATA_PATH
+        : System.getProperty(SYSPROP_TEST_DATA_DIR, DEFAULT_TEST_DATA_PATH);
+
     if (prop.isEmpty()) {
       // corner case: property is there but empty
       prop = DEFAULT_TEST_DATA_PATH;

+ 6 - 0
hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java

@@ -28,6 +28,7 @@ import javax.servlet.ServletContextListener;
 import com.codahale.metrics.JmxReporter;
 import com.codahale.metrics.Meter;
 import com.codahale.metrics.MetricRegistry;
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.crypto.key.CachingKeyProvider;
@@ -159,6 +160,11 @@ public class KMSWebApp implements ServletContextListener {
       }
       KeyProvider keyProvider =
           KeyProviderFactory.get(new URI(providerString), kmsConf);
+      Preconditions.checkNotNull(keyProvider, String.format("No" +
+              " KeyProvider has been initialized, please" +
+              " check whether %s '%s' is configured correctly in" +
+              " kms-site.xml.", KMSConfiguration.KEY_PROVIDER_URI,
+          providerString));
       if (kmsConf.getBoolean(KMSConfiguration.KEY_CACHE_ENABLE,
           KMSConfiguration.KEY_CACHE_ENABLE_DEFAULT)) {
         long keyTimeOutMillis =

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java

@@ -2910,7 +2910,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory,
    * @param num Number of threads for hedged reads thread pool.
    * If zero, skip hedged reads thread pool creation.
    */
-  private synchronized void initThreadsNumForHedgedReads(int num) {
+  private static synchronized void initThreadsNumForHedgedReads(int num) {
     if (num <= 0 || HEDGED_READ_THREAD_POOL != null) return;
     HEDGED_READ_THREAD_POOL = new ThreadPoolExecutor(1, num, 60,
         TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),

+ 98 - 18
hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml

@@ -31,6 +31,11 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
 
   <properties>
     <require.fuse>false</require.fuse>
+    <require.libwebhdfs>false</require.libwebhdfs>
+    <require.valgrind>false</require.valgrind>
+    <native_ctest_args></native_ctest_args>
+    <native_cmake_args></native_cmake_args>
+    <native_make_args></native_make_args>
     <hadoop.component>hdfs</hadoop.component>
   </properties>
 
@@ -85,6 +90,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
             <exclude>src/main/native/config/*</exclude>
             <exclude>src/main/native/m4/*</exclude>
             <exclude>src/main/native/fuse-dfs/util/tree.h</exclude>
+            <exclude>src/main/native/libhdfspp/third_party/**</exclude>
             <exclude>src/contrib/**</exclude>
           </excludes>
         </configuration>
@@ -138,17 +144,16 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
                 </goals>
                 <configuration>
                   <target>
-                    <condition property="generator" value="Visual Studio 10" else="Visual Studio 10 Win64">
-                      <equals arg1="Win32" arg2="${env.PLATFORM}" />
-                    </condition>
                     <mkdir dir="${project.build.directory}/native"/>
                     <exec executable="cmake" dir="${project.build.directory}/native"
                           failonerror="true">
-                      <arg line="${basedir}/src/ -DGENERATED_JAVAH=${project.build.directory}/native/javah -DJVM_ARCH_DATA_MODEL=${sun.arch.data.model} -DREQUIRE_FUSE=${require.fuse} -G '${generator}'"/>
+                      <arg line="${basedir}/src/ -DGENERATED_JAVAH=${project.build.directory}/native/javah -DJVM_ARCH_DATA_MODEL=${sun.arch.data.model} -DHADOOP_BUILD=1 -DREQUIRE_FUSE=${require.fuse} -DREQUIRE_VALGRIND=${require.valgrind} -A '${env.PLATFORM}'"/>
+                      <arg line="${native_cmake_args}"/>
                     </exec>
                     <exec executable="msbuild" dir="${project.build.directory}/native"
                           failonerror="true">
                       <arg line="ALL_BUILD.vcxproj /nologo /p:Configuration=RelWithDebInfo /p:LinkIncremental=false"/>
+                      <arg line="${native_make_args}"/>
                     </exec>
                     <!-- Copy for inclusion in distribution. -->
                     <copy todir="${project.build.directory}/bin">
@@ -167,11 +172,15 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
                     <property name="compile_classpath" refid="maven.compile.classpath"/>
                     <property name="test_classpath" refid="maven.test.classpath"/>
                     <exec executable="ctest" failonerror="true" dir="${project.build.directory}/native">
+                      <arg line="--output-on-failure"/>
+                      <arg line="${native_ctest_args}"/>
                       <env key="CLASSPATH" value="${test_classpath}:${compile_classpath}"/>
                       <!-- HADOOP_HOME required to find winutils. -->
                       <env key="HADOOP_HOME" value="${hadoop.common.build.dir}"/>
                       <!-- Make sure hadoop.dll and jvm.dll are on PATH. -->
                       <env key="PATH" value="${env.PATH};${hadoop.common.build.dir}/bin;${java.home}/jre/bin/server;${java.home}/bin/server"/>
+                      <!-- Make sure libhadoop.so is on LD_LIBRARY_PATH. -->
+                      <env key="LD_LIBRARY_PATH" value="${env.LD_LIBRARY_PATH}:${project.build.directory}/native/target/usr/local/lib:${hadoop.common.build.dir}/native/target/usr/local/lib"/>
                     </exec>
                   </target>
                 </configuration>
@@ -192,31 +201,90 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
       <build>
         <plugins>
           <plugin>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-maven-plugins</artifactId>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-antrun-plugin</artifactId>
             <executions>
               <execution>
-                <id>cmake-compile</id>
+                <id>make</id>
                 <phase>compile</phase>
-                <goals><goal>cmake-compile</goal></goals>
+                <goals><goal>run</goal></goals>
+                <configuration>
+                  <target>
+                    <mkdir dir="${project.build.directory}"/>
+                    <exec executable="cmake" dir="${project.build.directory}" failonerror="true">
+                      <arg line="${basedir}/src/ -DGENERATED_JAVAH=${project.build.directory}/native/javah -DJVM_ARCH_DATA_MODEL=${sun.arch.data.model}  -DHADOOP_BUILD=1 -DREQUIRE_LIBWEBHDFS=${require.libwebhdfs} -DREQUIRE_FUSE=${require.fuse} -DREQUIRE_VALGRIND=${require.valgrind} "/>
+                      <arg line="${native_cmake_args}"/>
+                    </exec>
+                    <exec executable="make" dir="${project.build.directory}" failonerror="true">
+                      <arg line="${native_make_args}"/>
+                    </exec>
+                  </target>
+                </configuration>
+              </execution>
+              <execution>
+                <id>native_tests</id>
+                <phase>test</phase>
+                <goals><goal>run</goal></goals>
                 <configuration>
-                  <source>${basedir}/src</source>
-                  <vars>
-                    <GENERATED_JAVAH>${project.build.directory}/native/javah</GENERATED_JAVAH>
-                    <JVM_ARCH_DATA_MODEL>${sun.arch.data.model}</JVM_ARCH_DATA_MODEL>
-                    <REQUIRE_FUSE>${require.fuse}</REQUIRE_FUSE>
-                  </vars>
-                  <output>${project.build.directory}</output>
+                  <skip>${skipTests}</skip>
+                  <target>
+                    <property name="compile_classpath" refid="maven.compile.classpath"/>
+                    <property name="test_classpath" refid="maven.test.classpath"/>
+                    <exec executable="ctest" failonerror="true" dir="${project.build.directory}/">
+                      <arg line="--output-on-failure"/>
+                      <arg line="${native_ctest_args}"/>
+                      <env key="CLASSPATH" value="${test_classpath}:${compile_classpath}"/>
+                      <!-- Make sure libhadoop.so is on LD_LIBRARY_PATH. -->
+                      <env key="LD_LIBRARY_PATH" value="${env.LD_LIBRARY_PATH}:${project.build.directory}/native/target/usr/local/lib:${hadoop.common.build.dir}/native/target/usr/local/lib"/>
+                    </exec>
+                  </target>
                 </configuration>
               </execution>
             </executions>
           </plugin>
+        </plugins>
+      </build>
+    </profile>
+    <profile>
+      <id>test-patch</id>
+      <activation>
+        <activeByDefault>false</activeByDefault>
+      </activation>
+      <properties>
+        <runningWithNative>true</runningWithNative>
+      </properties>
+      <build>
+        <plugins>
           <plugin>
             <groupId>org.apache.maven.plugins</groupId>
             <artifactId>maven-antrun-plugin</artifactId>
             <executions>
               <execution>
-                <id>native_tests</id>
+                <id>make_altern</id>
+                <phase>compile</phase>
+                <goals><goal>run</goal></goals>
+                <configuration>
+                  <target>
+                    <mkdir dir="${project.build.directory}/altern"/>
+                    <condition property="c_compiler" value="clang" else="gcc">
+                      <contains string="${env.CC}" substring="gcc"/>
+                    </condition>
+                    <condition property="cxx_compiler" value="clang++" else="g++">
+                      <contains string="${env.CXX}" substring="g++"/>
+                    </condition>
+                    <exec executable="cmake" dir="${project.build.directory}/altern" failonerror="true">
+                      <arg line="${basedir}/src/ -DGENERATED_JAVAH=${project.build.directory}/altern/native/javah -DJVM_ARCH_DATA_MODEL=${sun.arch.data.model}  -DHADOOP_BUILD=1 -DREQUIRE_LIBWEBHDFS=${require.libwebhdfs} -DREQUIRE_FUSE=${require.fuse} -DREQUIRE_VALGRIND=${require.valgrind} "/>
+                      <arg line="-DCMAKE_C_COMPILER=${c_compiler} -DCMAKE_CXX_COMPILER=${cxx_compiler}"/>
+                      <arg line="${native_cmake_args}"/>
+                    </exec>
+                    <exec executable="make" dir="${project.build.directory}/altern" failonerror="true">
+                      <arg line="${native_make_args}"/>
+                    </exec>
+                  </target>
+                </configuration>
+              </execution>
+              <execution>
+                <id>native_tests_altern</id>
                 <phase>test</phase>
                 <goals><goal>run</goal></goals>
                 <configuration>
@@ -224,14 +292,26 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
                   <target>
                     <property name="compile_classpath" refid="maven.compile.classpath"/>
                     <property name="test_classpath" refid="maven.test.classpath"/>
-                    <exec executable="ctest" failonerror="true" dir="${project.build.directory}/">
+                    <exec executable="ctest" failonerror="true" dir="${project.build.directory}/altern">
+                      <arg line="--output-on-failure"/>
+                      <arg line="${native_ctest_args}"/>
                       <env key="CLASSPATH" value="${test_classpath}:${compile_classpath}"/>
                       <!-- Make sure libhadoop.so is on LD_LIBRARY_PATH. -->
-                      <env key="LD_LIBRARY_PATH" value="${env.LD_LIBRARY_PATH}:${project.build.directory}/native/target/usr/local/lib:${hadoop.common.build.dir}/native/target/usr/local/lib"/>
+                      <env key="LD_LIBRARY_PATH" value="${env.LD_LIBRARY_PATH}:${project.build.directory}/altern/target/usr/local/lib:${hadoop.common.build.dir}/native/target/usr/local/lib"/>
                     </exec>
                   </target>
                 </configuration>
               </execution>
+              <execution>
+                <id>clean_altern</id>
+                <phase>test</phase>
+                <goals><goal>run</goal></goals>
+                <configuration>
+                  <target>
+                    <delete dir="${project.build.directory}/altern" includeemptydirs="true"/>
+                  </target>
+                </configuration>
+              </execution>
             </executions>
           </plugin>
         </plugins>

+ 7 - 10
hadoop-hdfs-project/hadoop-hdfs-native-client/src/CMakeLists.txt

@@ -58,19 +58,11 @@ if(WIN32)
     # Omit unneeded headers.
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DWIN32_LEAN_AND_MEAN")
     set(OS_DIR ${CMAKE_SOURCE_DIR}/main/native/libhdfs/os/windows)
-
-    # IMPORTANT: OUT_DIR MUST be relative to maven's
-    # project.build.directory (=target) and match dist-copynativelibs
-    # in order to be in a release
-    set(OUT_DIR bin)
+    set(OUT_DIR target/bin)
 else()
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
     set(OS_DIR ${CMAKE_SOURCE_DIR}/main/native/libhdfs/os/posix)
-
-    # IMPORTANT: OUT_DIR MUST be relative to maven's
-    # project.build.directory (=target) and match dist-copynativelibs
-    # in order to be in a release
-    set(OUT_DIR native/target/usr/local/lib)
+    set(OUT_DIR target/usr/local/lib)
 endif()
 
 # Configure JNI.
@@ -98,6 +90,11 @@ endfunction()
 
 add_subdirectory(main/native/libhdfs)
 add_subdirectory(main/native/libhdfs-tests)
+add_subdirectory(main/native/libhdfspp)
+
+if(REQUIRE_LIBWEBHDFS)
+    add_subdirectory(contrib/libwebhdfs)
+endif()
 
 # Find Linux FUSE
 if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")

+ 16 - 6
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/native_mini_dfs.c

@@ -182,6 +182,16 @@ struct NativeMiniDfsCluster* nmdCreate(struct NativeMiniDfsConf *conf)
         }
         (*env)->DeleteLocalRef(env, val.l);
     }
+    if (conf->numDataNodes) {
+        jthr = invokeMethod(env, &val, INSTANCE, bld, MINIDFS_CLUSTER_BUILDER,
+                "numDataNodes", "(I)L" MINIDFS_CLUSTER_BUILDER ";", conf->numDataNodes);
+        if (jthr) {
+            printExceptionAndFree(env, jthr, PRINT_EXC_ALL, "nmdCreate: "
+                                  "Builder::numDataNodes");
+            goto error;
+        }
+    }
+    (*env)->DeleteLocalRef(env, val.l);
     jthr = invokeMethod(env, &val, INSTANCE, bld, MINIDFS_CLUSTER_BUILDER,
             "build", "()L" MINIDFS_CLUSTER ";");
     if (jthr) {
@@ -291,7 +301,7 @@ int nmdGetNameNodeHttpAddress(const struct NativeMiniDfsCluster *cl,
     jthrowable jthr;
     int ret = 0;
     const char *host;
-    
+
     if (!env) {
         fprintf(stderr, "nmdHdfsConnect: getJNIEnv failed\n");
         return -EIO;
@@ -306,7 +316,7 @@ int nmdGetNameNodeHttpAddress(const struct NativeMiniDfsCluster *cl,
         return -EIO;
     }
     jNameNode = jVal.l;
-    
+
     // Then get the http address (InetSocketAddress) of the NameNode
     jthr = invokeMethod(env, &jVal, INSTANCE, jNameNode, HADOOP_NAMENODE,
                         "getHttpAddress", "()L" JAVA_INETSOCKETADDRESS ";");
@@ -317,7 +327,7 @@ int nmdGetNameNodeHttpAddress(const struct NativeMiniDfsCluster *cl,
         goto error_dlr_nn;
     }
     jAddress = jVal.l;
-    
+
     jthr = invokeMethod(env, &jVal, INSTANCE, jAddress,
                         JAVA_INETSOCKETADDRESS, "getPort", "()I");
     if (jthr) {
@@ -327,7 +337,7 @@ int nmdGetNameNodeHttpAddress(const struct NativeMiniDfsCluster *cl,
         goto error_dlr_addr;
     }
     *port = jVal.i;
-    
+
     jthr = invokeMethod(env, &jVal, INSTANCE, jAddress, JAVA_INETSOCKETADDRESS,
                         "getHostName", "()Ljava/lang/String;");
     if (jthr) {
@@ -339,12 +349,12 @@ int nmdGetNameNodeHttpAddress(const struct NativeMiniDfsCluster *cl,
     host = (*env)->GetStringUTFChars(env, jVal.l, NULL);
     *hostName = strdup(host);
     (*env)->ReleaseStringUTFChars(env, jVal.l, host);
-    
+
 error_dlr_addr:
     (*env)->DeleteLocalRef(env, jAddress);
 error_dlr_nn:
     (*env)->DeleteLocalRef(env, jNameNode);
-    
+
     return ret;
 }
 

+ 8 - 3
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/native_mini_dfs.h

@@ -26,7 +26,7 @@ extern  "C" {
 #endif
 
 struct hdfsBuilder;
-struct NativeMiniDfsCluster; 
+struct NativeMiniDfsCluster;
 
 /**
  * Represents a configuration to use for creating a Native MiniDFSCluster
@@ -51,6 +51,11 @@ struct NativeMiniDfsConf {
      * Nonzero if we should configure short circuit.
      */
     jboolean configureShortCircuit;
+
+    /**
+     * The number of datanodes in MiniDfsCluster
+     */
+    jint numDataNodes;
 };
 
 /**
@@ -96,13 +101,13 @@ void nmdFree(struct NativeMiniDfsCluster* cl);
  *
  * @return          the port, or a negative error code
  */
-int nmdGetNameNodePort(const struct NativeMiniDfsCluster *cl); 
+int nmdGetNameNodePort(const struct NativeMiniDfsCluster *cl);
 
 /**
  * Get the http address that's in use by the given (non-HA) nativeMiniDfs
  *
  * @param cl        The initialized NativeMiniDfsCluster
- * @param port      Used to capture the http port of the NameNode 
+ * @param port      Used to capture the http port of the NameNode
  *                  of the NativeMiniDfsCluster
  * @param hostName  Used to capture the http hostname of the NameNode
  *                  of the NativeMiniDfsCluster

+ 350 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/test_libhdfs_mini_stress.c

@@ -0,0 +1,350 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "expect.h"
+#include "hdfs/hdfs.h"
+#include "hdfspp/hdfs_ext.h"
+#include "native_mini_dfs.h"
+#include "os/thread.h"
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define TO_STR_HELPER(X) #X
+#define TO_STR(X) TO_STR_HELPER(X)
+
+#define TLH_MAX_THREADS 10000
+
+#define TLH_MAX_DNS 16
+
+#define TLH_DEFAULT_BLOCK_SIZE 1048576
+
+#define TLH_DEFAULT_DFS_REPLICATION 3
+
+#define TLH_DEFAULT_IPC_CLIENT_CONNECT_MAX_RETRIES 100
+
+#define TLH_DEFAULT_IPC_CLIENT_CONNECT_RETRY_INTERVAL_MS 5
+
+#ifndef RANDOM_ERROR_RATIO
+#define RANDOM_ERROR_RATIO 1000000000
+#endif
+
+struct tlhThreadInfo {
+  /** Thread index */
+  int threadIdx;
+  /** 0 = thread was successful; error code otherwise */
+  int success;
+  /** thread identifier */
+  thread theThread;
+  /** fs, shared with other threads **/
+  hdfsFS hdfs;
+  /** Filename */
+  const char *fileNm;
+
+};
+
+static int hdfsNameNodeConnect(struct NativeMiniDfsCluster *cl, hdfsFS *fs,
+                               const char *username)
+{
+  int ret;
+  tPort port;
+  hdfsFS hdfs;
+  struct hdfsBuilder *bld;
+
+  port = (tPort)nmdGetNameNodePort(cl);
+  if (port < 0) {
+    fprintf(stderr, "hdfsNameNodeConnect: nmdGetNameNodePort "
+            "returned error %d\n", port);
+    return port;
+  }
+  bld = hdfsNewBuilder();
+  if (!bld)
+    return -ENOMEM;
+  hdfsBuilderSetForceNewInstance(bld);
+  hdfsBuilderSetNameNode(bld, "localhost");
+  hdfsBuilderSetNameNodePort(bld, port);
+  hdfsBuilderConfSetStr(bld, "dfs.block.size",
+                        TO_STR(TLH_DEFAULT_BLOCK_SIZE));
+  hdfsBuilderConfSetStr(bld, "dfs.blocksize",
+                        TO_STR(TLH_DEFAULT_BLOCK_SIZE));
+  hdfsBuilderConfSetStr(bld, "dfs.replication",
+                        TO_STR(TLH_DEFAULT_DFS_REPLICATION));
+  hdfsBuilderConfSetStr(bld, "ipc.client.connect.max.retries",
+                        TO_STR(TLH_DEFAULT_IPC_CLIENT_CONNECT_MAX_RETRIES));
+  hdfsBuilderConfSetStr(bld, "ipc.client.connect.retry.interval",
+                        TO_STR(TLH_DEFAULT_IPC_CLIENT_CONNECT_RETRY_INTERVAL_MS));
+  if (username) {
+    hdfsBuilderSetUserName(bld, username);
+  }
+  hdfs = hdfsBuilderConnect(bld);
+  if (!hdfs) {
+    ret = -errno;
+    return ret;
+  }
+  *fs = hdfs;
+  return 0;
+}
+
+static int hdfsWriteData(hdfsFS hdfs, const char *dirNm,
+                         const char *fileNm, tSize fileSz)
+{
+  hdfsFile file;
+  int ret, expected;
+  const char *content;
+
+  content = fileNm;
+
+  if (hdfsExists(hdfs, dirNm) == 0) {
+    EXPECT_ZERO(hdfsDelete(hdfs, dirNm, 1));
+  }
+  EXPECT_ZERO(hdfsCreateDirectory(hdfs, dirNm));
+
+  file = hdfsOpenFile(hdfs, fileNm, O_WRONLY, 0, 0, 0);
+  EXPECT_NONNULL(file);
+
+  expected = (int)strlen(content);
+  tSize sz = 0;
+  while (sz < fileSz) {
+    ret = hdfsWrite(hdfs, file, content, expected);
+    if (ret < 0) {
+      ret = errno;
+      fprintf(stderr, "hdfsWrite failed and set errno %d\n", ret);
+      return ret;
+    }
+    if (ret != expected) {
+      fprintf(stderr, "hdfsWrite was supposed to write %d bytes, but "
+              "it wrote %d\n", ret, expected);
+      return EIO;
+    }
+    sz += ret;
+  }
+  EXPECT_ZERO(hdfsFlush(hdfs, file));
+  EXPECT_ZERO(hdfsHSync(hdfs, file));
+  EXPECT_ZERO(hdfsCloseFile(hdfs, file));
+  return 0;
+}
+
+static int fileEventCallback1(const char * event, const char * cluster, const char * file, int64_t value, int64_t cookie)
+{
+  char * randomErrRatioStr = getenv("RANDOM_ERROR_RATIO");
+  int64_t randomErrRatio = RANDOM_ERROR_RATIO;
+  if (randomErrRatioStr) randomErrRatio = (int64_t)atoi(randomErrRatioStr);
+  if (randomErrRatio == 0) return DEBUG_SIMULATE_ERROR;
+  else if (randomErrRatio < 0) return LIBHDFSPP_EVENT_OK;
+  return random() % randomErrRatio == 0 ? DEBUG_SIMULATE_ERROR : LIBHDFSPP_EVENT_OK;
+}
+
+static int fileEventCallback2(const char * event, const char * cluster, const char * file, int64_t value, int64_t cookie)
+{
+  /* no op */
+  return LIBHDFSPP_EVENT_OK;
+}
+
+static int doTestHdfsMiniStress(struct tlhThreadInfo *ti, int randomErr)
+{
+  char tmp[4096];
+  hdfsFile file;
+  int ret, expected;
+  hdfsFileInfo *fileInfo;
+  uint64_t readOps, nErrs=0;
+  tOffset seekPos;
+  const char *content;
+
+  content = ti->fileNm;
+  expected = (int)strlen(content);
+
+  fileInfo = hdfsGetPathInfo(ti->hdfs, ti->fileNm);
+  EXPECT_NONNULL(fileInfo);
+
+  file = hdfsOpenFile(ti->hdfs, ti->fileNm, O_RDONLY, 0, 0, 0);
+  EXPECT_NONNULL(file);
+
+  libhdfspp_file_event_callback callback = (randomErr != 0) ? &fileEventCallback1 : &fileEventCallback2;
+
+  hdfsPreAttachFileMonitor(callback, 0);
+
+  fprintf(stderr, "testHdfsMiniStress(threadIdx=%d): starting read loop\n",
+          ti->threadIdx);
+  for (readOps=0; readOps < 1000; ++readOps) {
+    EXPECT_ZERO(hdfsCloseFile(ti->hdfs, file));
+    file = hdfsOpenFile(ti->hdfs, ti->fileNm, O_RDONLY, 0, 0, 0);
+    EXPECT_NONNULL(file);
+    seekPos = (((double)random()) / RAND_MAX) * (fileInfo->mSize - expected);
+    seekPos = (seekPos / expected) * expected;
+    ret = hdfsSeek(ti->hdfs, file, seekPos);
+    if (ret < 0) {
+      ret = errno;
+      fprintf(stderr, "hdfsSeek to %"PRIu64" failed and set"
+              " errno %d\n", seekPos, ret);
+      ++nErrs;
+      continue;
+    }
+    ret = hdfsRead(ti->hdfs, file, tmp, expected);
+    if (ret < 0) {
+      ret = errno;
+      fprintf(stderr, "hdfsRead failed and set errno %d\n", ret);
+      ++nErrs;
+      continue;
+    }
+    if (ret != expected) {
+      fprintf(stderr, "hdfsRead was supposed to read %d bytes, but "
+              "it read %d\n", ret, expected);
+      ++nErrs;
+      continue;
+    }
+    ret = memcmp(content, tmp, expected);
+    if (ret) {
+      fprintf(stderr, "hdfsRead result (%.*s) does not match expected (%.*s)",
+              expected, tmp, expected, content);
+      ++nErrs;
+      continue;
+    }
+  }
+  EXPECT_ZERO(hdfsCloseFile(ti->hdfs, file));
+  fprintf(stderr, "testHdfsMiniStress(threadIdx=%d): finished read loop\n",
+          ti->threadIdx);
+  EXPECT_ZERO(nErrs);
+  return 0;
+}
+
+static int testHdfsMiniStressImpl(struct tlhThreadInfo *ti)
+{
+  fprintf(stderr, "testHdfsMiniStress(threadIdx=%d): starting\n",
+          ti->threadIdx);
+  EXPECT_NONNULL(ti->hdfs);
+  // Error injection on, some failures are expected in the read path.
+  // The expectation is that any memory stomps will cascade and cause
+  // the following test to fail.  Ideally RPC errors would be seperated
+  // from BlockReader errors (RPC is expected to recover from disconnects).
+  doTestHdfsMiniStress(ti, 1);
+  // No error injection
+  EXPECT_ZERO(doTestHdfsMiniStress(ti, 0));
+  return 0;
+}
+
+static void testHdfsMiniStress(void *v)
+{
+  struct tlhThreadInfo *ti = (struct tlhThreadInfo*)v;
+  int ret = testHdfsMiniStressImpl(ti);
+  ti->success = ret;
+}
+
+static int checkFailures(struct tlhThreadInfo *ti, int tlhNumThreads)
+{
+  int i, threadsFailed = 0;
+  const char *sep = "";
+
+  for (i = 0; i < tlhNumThreads; i++) {
+    if (ti[i].success != 0) {
+      threadsFailed = 1;
+    }
+  }
+  if (!threadsFailed) {
+    fprintf(stderr, "testLibHdfsMiniStress: all threads succeeded.  SUCCESS.\n");
+    return EXIT_SUCCESS;
+  }
+  fprintf(stderr, "testLibHdfsMiniStress: some threads failed: [");
+  for (i = 0; i < tlhNumThreads; i++) {
+    if (ti[i].success != 0) {
+      fprintf(stderr, "%s%d", sep, i);
+      sep = ", ";
+    }
+  }
+  fprintf(stderr, "].  FAILURE.\n");
+  return EXIT_FAILURE;
+}
+
+/**
+ * Test intended to stress libhdfs client with concurrent requests. Currently focused
+ * on concurrent reads.
+ */
+int main(void)
+{
+  int i, tlhNumThreads;
+  char *dirNm, *fileNm;
+  tSize fileSz;
+  const char *tlhNumThreadsStr, *tlhNumDNsStr;
+  hdfsFS hdfs = NULL;
+  struct NativeMiniDfsCluster* tlhCluster;
+  struct tlhThreadInfo ti[TLH_MAX_THREADS];
+  struct NativeMiniDfsConf conf = {
+      1, /* doFormat */
+  };
+
+  dirNm = "/tlhMiniStressData";
+  fileNm = "/tlhMiniStressData/file";
+  fileSz = 2*1024*1024;
+
+  tlhNumDNsStr = getenv("TLH_NUM_DNS");
+  if (!tlhNumDNsStr) {
+    tlhNumDNsStr = "1";
+  }
+  conf.numDataNodes = atoi(tlhNumDNsStr);
+  if ((conf.numDataNodes <= 0) || (conf.numDataNodes > TLH_MAX_DNS)) {
+    fprintf(stderr, "testLibHdfsMiniStress: must have a number of datanodes "
+            "between 1 and %d inclusive, not %d\n",
+            TLH_MAX_DNS, conf.numDataNodes);
+    return EXIT_FAILURE;
+  }
+
+  tlhNumThreadsStr = getenv("TLH_NUM_THREADS");
+  if (!tlhNumThreadsStr) {
+    tlhNumThreadsStr = "8";
+  }
+  tlhNumThreads = atoi(tlhNumThreadsStr);
+  if ((tlhNumThreads <= 0) || (tlhNumThreads > TLH_MAX_THREADS)) {
+    fprintf(stderr, "testLibHdfsMiniStress: must have a number of threads "
+            "between 1 and %d inclusive, not %d\n",
+            TLH_MAX_THREADS, tlhNumThreads);
+    return EXIT_FAILURE;
+  }
+  memset(&ti[0], 0, sizeof(ti));
+  for (i = 0; i < tlhNumThreads; i++) {
+    ti[i].threadIdx = i;
+  }
+
+  tlhCluster = nmdCreate(&conf);
+  EXPECT_NONNULL(tlhCluster);
+  EXPECT_ZERO(nmdWaitClusterUp(tlhCluster));
+
+  EXPECT_ZERO(hdfsNameNodeConnect(tlhCluster, &hdfs, NULL));
+
+  // Single threaded writes for now.
+  EXPECT_ZERO(hdfsWriteData(hdfs, dirNm, fileNm, fileSz));
+
+  // Multi-threaded reads.
+  for (i = 0; i < tlhNumThreads; i++) {
+    ti[i].theThread.start = testHdfsMiniStress;
+    ti[i].theThread.arg = &ti[i];
+    ti[i].hdfs = hdfs;
+    ti[i].fileNm = fileNm;
+    EXPECT_ZERO(threadCreate(&ti[i].theThread));
+  }
+  for (i = 0; i < tlhNumThreads; i++) {
+    EXPECT_ZERO(threadJoin(&ti[i].theThread));
+  }
+
+  EXPECT_ZERO(hdfsDisconnect(hdfs));
+  EXPECT_ZERO(nmdShutdown(tlhCluster));
+  nmdFree(tlhCluster);
+  return checkFailures(ti, tlhNumThreads);
+}

+ 59 - 11
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs-tests/test_libhdfs_threaded.c

@@ -30,6 +30,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <limits.h>
 
 #define TO_STR_HELPER(X) #X
 #define TO_STR(X) TO_STR_HELPER(X)
@@ -56,7 +57,7 @@ static int hdfsSingleNameNodeConnect(struct NativeMiniDfsCluster *cl, hdfsFS *fs
     tPort port;
     hdfsFS hdfs;
     struct hdfsBuilder *bld;
-    
+
     port = (tPort)nmdGetNameNodePort(cl);
     if (port < 0) {
         fprintf(stderr, "hdfsSingleNameNodeConnect: nmdGetNameNodePort "
@@ -92,13 +93,12 @@ static int doTestGetDefaultBlockSize(hdfsFS fs, const char *path)
 
     blockSize = hdfsGetDefaultBlockSize(fs);
     if (blockSize < 0) {
-        ret = errno;
-        fprintf(stderr, "hdfsGetDefaultBlockSize failed with error %d\n", ret);
-        return ret;
+        fprintf(stderr, "hdfsGetDefaultBlockSize failed with error %d\n", errno);
+        return -1;
     } else if (blockSize != TLH_DEFAULT_BLOCK_SIZE) {
         fprintf(stderr, "hdfsGetDefaultBlockSize got %"PRId64", but we "
                 "expected %d\n", blockSize, TLH_DEFAULT_BLOCK_SIZE);
-        return EIO;
+        return -1;
     }
 
     blockSize = hdfsGetDefaultBlockSizeAtPath(fs, path);
@@ -109,7 +109,7 @@ static int doTestGetDefaultBlockSize(hdfsFS fs, const char *path)
         return ret;
     } else if (blockSize != TLH_DEFAULT_BLOCK_SIZE) {
         fprintf(stderr, "hdfsGetDefaultBlockSizeAtPath(%s) got "
-                "%"PRId64", but we expected %d\n", 
+                "%"PRId64", but we expected %d\n",
                 path, blockSize, TLH_DEFAULT_BLOCK_SIZE);
         return EIO;
     }
@@ -157,12 +157,19 @@ static int doTestHdfsOperations(struct tlhThreadInfo *ti, hdfsFS fs,
 
     EXPECT_ZERO(doTestGetDefaultBlockSize(fs, paths->prefix));
 
+    /* There is no such directory.
+     * Check that errno is set to ENOENT
+     */
+    char invalid_path[] = "/some_invalid/path";
+    EXPECT_NULL_WITH_ERRNO(hdfsListDirectory(fs, invalid_path, &numEntries), ENOENT);
+
     /* There should be no entry in the directory. */
     errno = EACCES; // see if errno is set to 0 on success
     EXPECT_NULL_WITH_ERRNO(hdfsListDirectory(fs, paths->prefix, &numEntries), 0);
     if (numEntries != 0) {
         fprintf(stderr, "hdfsListDirectory set numEntries to "
                 "%d on empty directory.", numEntries);
+        return EIO;
     }
 
     /* There should not be any file to open for reading. */
@@ -190,19 +197,45 @@ static int doTestHdfsOperations(struct tlhThreadInfo *ti, hdfsFS fs,
     }
     if (ret != expected) {
         fprintf(stderr, "hdfsWrite was supposed to write %d bytes, but "
-                "it wrote %d\n", ret, expected);
+                "it wrote %d\n", expected, ret);
         return EIO;
     }
     EXPECT_ZERO(hdfsFlush(fs, file));
     EXPECT_ZERO(hdfsHSync(fs, file));
     EXPECT_ZERO(hdfsCloseFile(fs, file));
 
+    EXPECT_ZERO(doTestGetDefaultBlockSize(fs, paths->file1));
+
     /* There should be 1 entry in the directory. */
-    EXPECT_NONNULL(hdfsListDirectory(fs, paths->prefix, &numEntries));
+    hdfsFileInfo * dirList = hdfsListDirectory(fs, paths->prefix, &numEntries);
+    EXPECT_NONNULL(dirList);
     if (numEntries != 1) {
         fprintf(stderr, "hdfsListDirectory set numEntries to "
                 "%d on directory containing 1 file.", numEntries);
     }
+    hdfsFreeFileInfo(dirList, numEntries);
+
+    /* Create many files for ListDirectory to page through */
+    char listDirTest[PATH_MAX];
+    strcpy(listDirTest, paths->prefix);
+    strcat(listDirTest, "/for_list_test/");
+    EXPECT_ZERO(hdfsCreateDirectory(fs, listDirTest));
+    int nFile;
+    for (nFile = 0; nFile < 10000; nFile++) {
+      char filename[PATH_MAX];
+      snprintf(filename, PATH_MAX, "%s/many_files_%d", listDirTest, nFile);
+      file = hdfsOpenFile(fs, filename, O_WRONLY, 0, 0, 0);
+      EXPECT_NONNULL(file);
+      EXPECT_ZERO(hdfsCloseFile(fs, file));
+    }
+    dirList = hdfsListDirectory(fs, listDirTest, &numEntries);
+    EXPECT_NONNULL(dirList);
+    hdfsFreeFileInfo(dirList, numEntries);
+    if (numEntries != 10000) {
+        fprintf(stderr, "hdfsListDirectory set numEntries to "
+                "%d on directory containing 10000 files.", numEntries);
+        return EIO;
+    }
 
     /* Let's re-open the file for reading */
     file = hdfsOpenFile(fs, paths->file1, O_RDONLY, 0, 0, 0);
@@ -246,8 +279,8 @@ static int doTestHdfsOperations(struct tlhThreadInfo *ti, hdfsFS fs,
     EXPECT_ZERO(memcmp(paths->prefix, tmp, expected));
     EXPECT_ZERO(hdfsCloseFile(fs, file));
 
-    // TODO: Non-recursive delete should fail?
-    //EXPECT_NONZERO(hdfsDelete(fs, prefix, 0));
+    //Non-recursive delete fails
+    EXPECT_NONZERO(hdfsDelete(fs, paths->prefix, 0));
     EXPECT_ZERO(hdfsCopy(fs, paths->file1, fs, paths->file2));
 
     EXPECT_ZERO(hdfsChown(fs, paths->file2, NULL, NULL));
@@ -274,6 +307,17 @@ static int doTestHdfsOperations(struct tlhThreadInfo *ti, hdfsFS fs,
 
     snprintf(tmp, sizeof(tmp), "%s/nonexistent-file-name", paths->prefix);
     EXPECT_NEGATIVE_ONE_WITH_ERRNO(hdfsChown(fs, tmp, "ha3", NULL), ENOENT);
+
+    //Test case: File does not exist
+    EXPECT_NULL_WITH_ERRNO(hdfsGetPathInfo(fs, invalid_path), ENOENT);
+
+    //Test case: No permission to access parent directory
+    EXPECT_ZERO(hdfsChmod(fs, paths->prefix, 0));
+    //reconnect as user "SomeGuy" and verify that we get permission errors
+    hdfsFS fs2 = NULL;
+    EXPECT_ZERO(hdfsSingleNameNodeConnect(tlhCluster, &fs2, "SomeGuy"));
+    EXPECT_NULL_WITH_ERRNO(hdfsGetPathInfo(fs2, paths->file2), EACCES);
+    EXPECT_ZERO(hdfsDisconnect(fs2));
     return 0;
 }
 
@@ -285,6 +329,8 @@ static int testHdfsOperationsImpl(struct tlhThreadInfo *ti)
     fprintf(stderr, "testHdfsOperations(threadIdx=%d): starting\n",
         ti->threadIdx);
     EXPECT_ZERO(hdfsSingleNameNodeConnect(tlhCluster, &fs, NULL));
+    if (!fs)
+        return 1;
     EXPECT_ZERO(setupPaths(ti, &paths));
     // test some operations
     EXPECT_ZERO(doTestHdfsOperations(ti, fs, &paths));
@@ -295,6 +341,8 @@ static int testHdfsOperationsImpl(struct tlhThreadInfo *ti)
     EXPECT_ZERO(hdfsDisconnect(fs));
     // reconnect to do the final delete.
     EXPECT_ZERO(hdfsSingleNameNodeConnect(tlhCluster, &fs, NULL));
+    if (!fs)
+        return 1;
     EXPECT_ZERO(hdfsDelete(fs, paths.prefix, 1));
     EXPECT_ZERO(hdfsDisconnect(fs));
     return 0;
@@ -325,7 +373,7 @@ static int checkFailures(struct tlhThreadInfo *ti, int tlhNumThreads)
     for (i = 0; i < tlhNumThreads; i++) {
         if (ti[i].success != 0) {
             fprintf(stderr, "%s%d", sep, i);
-            sep = ", "; 
+            sep = ", ";
         }
     }
     fprintf(stderr, "].  FAILURE.\n");

+ 49 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CMake/FindCyrusSASL.cmake

@@ -0,0 +1,49 @@
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# - Find Cyrus SASL (sasl.h, libsasl2.so)
+#
+# This module defines
+#  CYRUS_SASL_INCLUDE_DIR, directory containing headers
+#  CYRUS_SASL_SHARED_LIB, path to Cyrus SASL's shared library
+#  CYRUS_SASL_FOUND, whether Cyrus SASL and its plugins have been found
+#
+# N.B: we do _not_ include sasl in thirdparty, for a fairly subtle reason. The
+# TLDR version is that newer versions of cyrus-sasl (>=2.1.26) have a bug fix
+# for https://bugzilla.cyrusimap.org/show_bug.cgi?id=3590, but that bug fix
+# relied on a change both on the plugin side and on the library side. If you
+# then try to run the new version of sasl (e.g from our thirdparty tree) with
+# an older version of a plugin (eg from RHEL6 install), you'll get a SASL_NOMECH
+# error due to this bug.
+#
+# In practice, Cyrus-SASL is so commonly used and generally non-ABI-breaking that
+# we should be OK to depend on the host installation.
+
+# Note that this is modified from the version that was copied from our
+# friends at the Kudu project.  The original version implicitly required
+# the Cyrus SASL.  This version will only complain if REQUIRED is added.
+
+
+find_path(CYRUS_SASL_INCLUDE_DIR sasl/sasl.h)
+find_library(CYRUS_SASL_SHARED_LIB sasl2)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(CYRUS_SASL DEFAULT_MSG
+  CYRUS_SASL_SHARED_LIB CYRUS_SASL_INCLUDE_DIR)
+
+MARK_AS_ADVANCED(CYRUS_SASL_INCLUDE_DIR CYRUS_SASL_SHARED_LIB)

+ 44 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CMake/FindGSasl.cmake

@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# - Try to find the GNU sasl library (gsasl)
+#
+# Once done this will define
+#
+#  GSASL_FOUND - System has gnutls
+#  GSASL_INCLUDE_DIR - The gnutls include directory
+#  GSASL_LIBRARIES - The libraries needed to use gnutls
+#  GSASL_DEFINITIONS - Compiler switches required for using gnutls
+
+
+IF (GSASL_INCLUDE_DIR AND GSASL_LIBRARIES)
+  # in cache already
+  SET(GSasl_FIND_QUIETLY TRUE)
+ENDIF (GSASL_INCLUDE_DIR AND GSASL_LIBRARIES)
+
+FIND_PATH(GSASL_INCLUDE_DIR gsasl.h)
+
+FIND_LIBRARY(GSASL_LIBRARIES gsasl)
+
+INCLUDE(FindPackageHandleStandardArgs)
+
+# handle the QUIETLY and REQUIRED arguments and set GSASL_FOUND to TRUE if
+# all listed variables are TRUE
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(GSASL DEFAULT_MSG GSASL_LIBRARIES GSASL_INCLUDE_DIR)
+
+MARK_AS_ADVANCED(GSASL_INCLUDE_DIR GSASL_LIBRARIES)

+ 297 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CMakeLists.txt

@@ -0,0 +1,297 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# If cmake variable HDFSPP_LIBRARY_ONLY is set, then tests, examples, and
+# tools will not be built. This allows for faster builds of the libhdfspp
+# library alone, avoids looking for a JDK, valgrind, and gmock, and
+# prevents the generation of multiple binaries that might not be relevant
+# to other projects during normal use.
+# Example of cmake invocation with HDFSPP_LIBRARY_ONLY enabled:
+# cmake -DHDFSPP_LIBRARY_ONLY=1
+
+project (libhdfspp)
+
+cmake_minimum_required(VERSION 2.8)
+
+enable_testing()
+include (CTest)
+
+SET(BUILD_SHARED_HDFSPP TRUE CACHE STRING "BUILD_SHARED_HDFSPP defaulting to 'TRUE'")
+SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMake" ${CMAKE_MODULE_PATH})
+
+# If there's a better way to inform FindCyrusSASL.cmake, let's make this cleaner:
+SET(CMAKE_PREFIX_PATH "${CMAKE_PREFIX_PATH};${CYRUS_SASL_DIR};${GSASL_DIR};$ENV{PROTOBUF_HOME}")
+
+# Specify PROTOBUF_HOME so that find_package picks up the correct version
+SET(CMAKE_PREFIX_PATH "${CMAKE_PREFIX_PATH};$ENV{PROTOBUF_HOME}")
+
+find_package(Doxygen)
+find_package(OpenSSL REQUIRED)
+find_package(Protobuf REQUIRED)
+find_package(CyrusSASL)
+find_package(GSasl)
+find_package(Threads)
+
+include(CheckCXXSourceCompiles)
+
+# Check if thread_local is supported
+unset (THREAD_LOCAL_SUPPORTED CACHE)
+set (CMAKE_REQUIRED_DEFINITIONS "-std=c++11")
+set (CMAKE_REQUIRED_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
+check_cxx_source_compiles(
+    "#include <thread>
+    int main(void) {
+      thread_local int s;
+      return 0;
+    }"
+    THREAD_LOCAL_SUPPORTED)
+if (NOT THREAD_LOCAL_SUPPORTED)
+  message(FATAL_ERROR
+  "FATAL ERROR: The required feature thread_local storage is not supported by your compiler. \
+  Known compilers that support this feature: GCC, Visual Studio, Clang (community version), \
+  Clang (version for iOS 9 and later).")
+endif (NOT THREAD_LOCAL_SUPPORTED)
+
+# Check if PROTOC library was compiled with the compatible compiler by trying
+# to compile some dummy code
+unset (PROTOC_IS_COMPATIBLE CACHE)
+set (CMAKE_REQUIRED_INCLUDES ${PROTOBUF_INCLUDE_DIRS})
+set (CMAKE_REQUIRED_LIBRARIES ${PROTOBUF_LIBRARY} ${PROTOBUF_PROTOC_LIBRARY})
+check_cxx_source_compiles(
+    "#include <google/protobuf/io/printer.h>
+    #include <string>
+    int main(void) {
+      ::google::protobuf::io::ZeroCopyOutputStream *out = NULL;
+      ::google::protobuf::io::Printer printer(out, '$');
+      printer.PrintRaw(std::string(\"test\"));
+      return 0;
+    }"
+    PROTOC_IS_COMPATIBLE)
+if (NOT PROTOC_IS_COMPATIBLE)
+  message(WARNING
+  "WARNING: the Protocol Buffers Library and the Libhdfs++ Library must both be compiled \
+  with the same (or compatible) compiler. Normally only the same major versions of the same \
+  compiler are compatible with each other.")
+endif (NOT PROTOC_IS_COMPATIBLE)
+
+find_program(MEMORYCHECK_COMMAND valgrind HINTS ${VALGRIND_DIR} )
+set(MEMORYCHECK_COMMAND_OPTIONS "--trace-children=yes --leak-check=full --error-exitcode=1")
+message(STATUS "valgrind location: ${MEMORYCHECK_COMMAND}")
+
+if (REQUIRE_VALGRIND AND MEMORYCHECK_COMMAND MATCHES "MEMORYCHECK_COMMAND-NOTFOUND" )
+  message(FATAL_ERROR "valgrind was required but not found.  "
+                      "The path can be included via a -DVALGRIND_DIR=... flag passed to CMake.")
+endif (REQUIRE_VALGRIND AND MEMORYCHECK_COMMAND MATCHES "MEMORYCHECK_COMMAND-NOTFOUND" )
+
+# Find the SASL library to use.  If you don't want to require a sasl library,
+#    define -DNO_SASL=1 in your cmake call
+# Prefer Cyrus SASL, but use GSASL if it is found
+# Note that the packages can be disabled by setting CMAKE_DISABLE_FIND_PACKAGE_GSasl or
+#    CMAKE_DISABLE_FIND_PACKAGE_CyrusSASL, respectively (case sensitive)
+set (SASL_LIBRARIES)
+set (SASL_INCLUDE_DIR)
+if (NOT NO_SASL)
+    if (CYRUS_SASL_FOUND)
+        message(STATUS "Using Cyrus SASL; link with ${CYRUS_SASL_SHARED_LIB}")
+        set (SASL_INCLUDE_DIR ${CYRUS_SASL_INCLUDE_DIR})
+        set (SASL_LIBRARIES ${CYRUS_SASL_SHARED_LIB})
+        set (CMAKE_USING_CYRUS_SASL 1)
+        add_definitions(-DUSE_SASL -DUSE_CYRUS_SASL)
+    else (CYRUS_SASL_FOUND)
+        if (REQUIRE_CYRUS_SASL)
+          message(FATAL_ERROR "Cyrus SASL was required but not found.  "
+                                "The path can be included via a -DCYRUS_SASL_DIR=... flag passed to CMake.")
+        endif (REQUIRE_CYRUS_SASL)
+
+        # If we didn't pick Cyrus, use GSASL instead
+        if (GSASL_FOUND)
+          message(STATUS "Using GSASL; link with ${GSASL_LIBRARIES}")
+          set (SASL_INCLUDE_DIR ${GSASL_INCLUDE_DIR})
+          set (SASL_LIBRARIES ${GSASL_LIBRARIES})
+          set (CMAKE_USING_GSASL 1)
+          add_definitions(-DUSE_SASL -DUSE_GSASL)
+        else (GSASL_FOUND)
+          if (REQUIRE_GSASL)
+            message(FATAL_ERROR "GSASL was required but not found.  "
+                                "The path can be included via a -DGSASL_DIR=... flag passed to CMake.")
+          endif (REQUIRE_GSASL)
+
+          # No SASL was found, but NO_SASL was not defined
+          message(FATAL_ERROR "Cound not find a SASL library (GSASL (gsasl) or Cyrus SASL (libsasl2).  "
+                            "Install/configure one of them or define NO_SASL=1 in your cmake call")
+        endif (GSASL_FOUND)
+    endif (CYRUS_SASL_FOUND)
+else (NOT NO_SASL)
+    message(STATUS "Compiling with NO SASL SUPPORT")
+endif (NOT NO_SASL)
+
+add_definitions(-DASIO_STANDALONE -DASIO_CPP11_DATE_TIME)
+
+# Disable optimizations if compiling debug
+set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0")
+
+if(UNIX)
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic -std=c++11 -g -fPIC -fno-strict-aliasing")
+set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -fPIC -fno-strict-aliasing")
+endif()
+
+if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+    add_definitions(-DASIO_HAS_STD_ADDRESSOF -DASIO_HAS_STD_ARRAY -DASIO_HAS_STD_ATOMIC -DASIO_HAS_CSTDINT -DASIO_HAS_STD_SHARED_PTR -DASIO_HAS_STD_TYPE_TRAITS -DASIO_HAS_VARIADIC_TEMPLATES -DASIO_HAS_STD_FUNCTION -DASIO_HAS_STD_CHRONO -DASIO_HAS_STD_SYSTEM_ERROR)
+endif ()
+
+# Mac OS 10.7 and later deprecates most of the methods in OpenSSL.
+# Add -Wno-deprecated-declarations to avoid the warnings.
+if(APPLE)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++ -Wno-deprecated-declarations -Wno-unused-local-typedef")
+endif()
+
+if(DOXYGEN_FOUND)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/doc/Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/doc/Doxyfile @ONLY)
+add_custom_target(doc ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/doc/Doxyfile
+                  WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+                  COMMENT "Generating API documentation with Doxygen" VERBATIM)
+endif(DOXYGEN_FOUND)
+
+
+# Copy files from the hadoop tree into the output/extern directory if
+#    they've changed
+function (copy_on_demand input_src_glob input_dest_dir)
+  get_filename_component(src_glob ${input_src_glob} REALPATH)
+  get_filename_component(dest_dir ${input_dest_dir} REALPATH)
+  get_filename_component(src_dir ${src_glob} PATH)
+  message(STATUS "Syncing ${src_glob} to ${dest_dir}")
+
+  file(GLOB_RECURSE src_files ${src_glob})
+  foreach(src_path ${src_files})
+    file(RELATIVE_PATH relative_src ${src_dir} ${src_path})
+    set(dest_path "${dest_dir}/${relative_src}")
+    add_custom_command(TARGET copy_hadoop_files
+     COMMAND ${CMAKE_COMMAND} -E copy_if_different "${src_path}" "${dest_path}"
+    )
+  endforeach()
+endfunction()
+
+# If we're building in the hadoop tree, pull the Hadoop files that
+#     libhdfspp depends on.  This allows us to ensure that
+#     the distribution will have a consistent set of headers and
+#     .proto files
+if(HADOOP_BUILD)
+    set(HADOOP_IMPORT_DIR ${PROJECT_BINARY_DIR}/extern)
+    get_filename_component(HADOOP_IMPORT_DIR ${HADOOP_IMPORT_DIR} REALPATH)
+
+  add_custom_target(copy_hadoop_files ALL)
+
+  # Gather the Hadoop files and resources that libhdfs++ needs to build
+  copy_on_demand(../libhdfs/include/*.h* ${HADOOP_IMPORT_DIR}/include)
+  copy_on_demand(${CMAKE_CURRENT_LIST_DIR}/../../../../../hadoop-hdfs-client/src/main/proto/*.proto ${HADOOP_IMPORT_DIR}/proto/hdfs)
+  copy_on_demand(${CMAKE_CURRENT_LIST_DIR}/../../../../../../hadoop-common-project/hadoop-common/src/main/proto/*.proto  ${HADOOP_IMPORT_DIR}/proto/hadoop)
+  copy_on_demand(${CMAKE_CURRENT_LIST_DIR}/../../../../../../hadoop-common-project/hadoop-common/src/test/proto/*.proto  ${HADOOP_IMPORT_DIR}/proto/hadoop_test)
+else(HADOOP_BUILD)
+  set(HADOOP_IMPORT_DIR ${CMAKE_CURRENT_LIST_DIR}/extern)
+endif(HADOOP_BUILD)
+
+# Paths to find the imported files
+set(PROTO_HDFS_DIR         ${HADOOP_IMPORT_DIR}/proto/hdfs)
+set(PROTO_HADOOP_DIR       ${HADOOP_IMPORT_DIR}/proto/hadoop)
+set(PROTO_HADOOP_TEST_DIR  ${HADOOP_IMPORT_DIR}/proto/hadoop_test)
+
+include_directories(
+  include
+  lib
+  ${HADOOP_IMPORT_DIR}/include
+)
+
+include_directories( SYSTEM
+  ${PROJECT_BINARY_DIR}/lib/proto
+  third_party/asio-1.10.2/include
+  third_party/rapidxml-1.13
+  third_party/gmock-1.7.0
+  third_party/tr2
+  third_party/protobuf
+  third_party/uriparser2
+  ${OPENSSL_INCLUDE_DIR}
+  ${SASL_INCLUDE_DIR}
+  ${PROTOBUF_INCLUDE_DIRS}
+)
+
+
+add_subdirectory(third_party/gmock-1.7.0)
+add_subdirectory(third_party/uriparser2)
+add_subdirectory(lib)
+if(NOT HDFSPP_LIBRARY_ONLY)
+    add_subdirectory(tests)
+    add_subdirectory(examples)
+    add_subdirectory(tools)
+endif()
+
+# create an empty file; hadoop_add_dual_library wraps add_library which
+# requires at least one file as an argument
+set(EMPTY_FILE_CC ${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/empty.cc)
+file(WRITE ${EMPTY_FILE_CC} "")
+
+# Build the output libraries
+if(NEED_LINK_DL)
+   set(LIB_DL dl)
+endif()
+
+set(LIBHDFSPP_VERSION "0.1.0")
+set(LIBHDFSPP_ALL_OBJECTS $<TARGET_OBJECTS:bindings_c_obj> $<TARGET_OBJECTS:fs_obj> $<TARGET_OBJECTS:rpc_obj> $<TARGET_OBJECTS:reader_obj> $<TARGET_OBJECTS:proto_obj> $<TARGET_OBJECTS:connection_obj> $<TARGET_OBJECTS:common_obj> $<TARGET_OBJECTS:uriparser2_obj>)
+if (HADOOP_BUILD)
+  hadoop_add_dual_library(hdfspp ${EMPTY_FILE_CC} ${LIBHDFSPP_ALL_OBJECTS})
+  hadoop_target_link_dual_libraries(hdfspp
+    ${LIB_DL}
+    ${PROTOBUF_LIBRARY}
+    ${OPENSSL_LIBRARIES}
+    ${SASL_LIBRARIES}
+    ${CMAKE_THREAD_LIBS_INIT}
+  )
+  set_target_properties(hdfspp PROPERTIES SOVERSION ${LIBHDFSPP_VERSION})
+else (HADOOP_BUILD)
+  add_library(hdfspp_static STATIC ${EMPTY_FILE_CC} ${LIBHDFSPP_ALL_OBJECTS})
+  target_link_libraries(hdfspp_static
+    ${LIB_DL}
+    ${PROTOBUF_LIBRARY}
+    ${OPENSSL_LIBRARIES}
+    ${SASL_LIBRARIES}
+    ${CMAKE_THREAD_LIBS_INIT}
+    )
+  if(BUILD_SHARED_HDFSPP)
+    add_library(hdfspp SHARED ${EMPTY_FILE_CC} ${LIBHDFSPP_ALL_OBJECTS})
+    set_target_properties(hdfspp PROPERTIES SOVERSION ${LIBHDFSPP_VERSION})
+  endif(BUILD_SHARED_HDFSPP)
+endif (HADOOP_BUILD)
+
+# Set up make install targets
+# Can be installed to a particular location via "make DESTDIR=... install"
+file(GLOB_RECURSE LIBHDFSPP_HEADER_FILES "${CMAKE_CURRENT_LIST_DIR}/include/*.h*")
+file(GLOB_RECURSE LIBHDFS_HEADER_FILES "${HADOOP_IMPORT_DIR}/include/*.h*")
+install(FILES ${LIBHDFSPP_HEADER_FILES} DESTINATION include/hdfspp)
+install(FILES ${LIBHDFS_HEADER_FILES} DESTINATION include/hdfs)
+
+install(TARGETS hdfspp_static ARCHIVE DESTINATION lib)
+if(BUILD_SHARED_HDFSPP)
+  install(TARGETS hdfspp LIBRARY DESTINATION lib)
+endif(BUILD_SHARED_HDFSPP)
+
+add_custom_target(
+    InstallToBuildDirectory
+    COMMAND "${CMAKE_MAKE_PROGRAM}" install DESTDIR=${PROJECT_BINARY_DIR}/output
+)
+set(LIBHDFSPP_DIR ${PROJECT_BINARY_DIR}/output)

+ 161 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/CONTRIBUTING.md

@@ -0,0 +1,161 @@
+<!---
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+Libhdfs++ Coding Standards
+==========================
+
+* Libhdfs++ Coding Standards
+    * Introduction
+    * Automated Formatting
+    * Explicit Scoping
+    * Comments
+    * Portability
+
+
+Introduction
+------------
+
+The foundation of the libhdfs++ project's coding standards
+is Google's C++ style guide. It can be found here:
+
+<a href="https://google.github.io/styleguide/cppguide.html">https://google.github.io/styleguide/cppguide.html</a>
+
+There are several small restrictions adopted from Sun's Java
+standards and Hadoop convention on top of Google's that must
+also be followed as well as portability requirements.
+
+Automated Formatting
+--------------------
+
+Prior to submitting a patch for code review use llvm's formatting tool, clang-format, on the .h, .c, and .cc files included in the patch.  Use the -style=google switch when doing so.
+
+Example presubmission usage:
+
+``` shell
+cat my_source_file.cc | clang-format -style=goole > temp_file.cc
+#optionally diff the source and temp file to get an idea what changed
+mv temp_file.cc my_source_file.cc
+```
+
+* note: On some linux distributions clang-format already exists in repositories but don't show up without an appended version number.  On Ubuntu you'll find it with:
+``` shell
+   "apt-get install clang-format-3.6"
+```
+
+Explicit Block Scopes
+---------------------
+
+Always add brackets conditional and loop bodies, even if the body could fit on a single line.
+
+__BAD__:
+``` c
+if (foo)
+  Bar();
+
+if (foo)
+  Bar();
+else
+  Baz();
+
+for (int i=0; i<10; i++)
+  Bar(i);
+```
+__GOOD__:
+``` c
+if (foo) {
+  Bar();
+}
+
+if (foo) {
+  Bar();
+} else {
+  Baz();
+}
+
+for (int i=0; i<10; i++) {
+  Bar(i);
+}
+```
+
+Comments
+--------
+
+Use the /\* comment \*/ style to maintain consistency with the rest of the Hadoop code base.
+
+__BAD__:
+``` c
+//this is a bad single line comment
+/*
+  this is a bad block comment
+*/
+```
+__GOOD__:
+``` c
+/* this is a single line comment */
+
+/**
+ * This is a block comment.  Note that nothing is on the first
+ * line of the block.
+ **/
+```
+
+Portability
+-----------
+
+Please make sure you write code that is portable.
+
+* All code most be able to build using GCC and LLVM.
+    * In the future we hope to support other compilers as well.
+* Don't make assumptions about endianness or architecture.
+    * Don't do clever things with pointers or intrinsics.
+* Don't write code that could force a non-aligned word access.
+    * This causes performance issues on most architectures and isn't supported at all on some.
+    * Generally the compiler will prevent this unless you are doing clever things with pointers e.g. abusing placement new or reinterpreting a pointer into a pointer to a wider type.
+* If a type needs to be a a specific width make sure to specify it.
+    * `int32_t my_32_bit_wide_int`
+* Avoid using compiler dependent pragmas or attributes.
+    * If there is a justified and unavoidable reason for using these you must document why. See examples below.
+
+__BAD__:
+``` c
+struct Foo {
+  int32_t x_;
+  char y_;
+  int32_t z_;
+  char z_;
+} __attribute__((packed));
+/**
+ * "I didn't profile and identify that this is causing
+ * significant memory overhead but I want to pack it to
+ * save 6 bytes"
+ **/
+```
+__NECESSARY__: Still not good but required for short-circuit reads.
+``` c
+struct FileDescriptorMessage {
+  struct cmsghdr msg_;
+  int file_descriptors_[2];
+} __attribute__((packed));
+/**
+ * This is actually needed for short circuit reads.
+ * "struct cmsghdr" is well defined on UNIX systems.
+ * This mechanism relies on the fact that any passed
+ * ancillary data is _directly_ following the cmghdr.
+ * The kernel interprets any padding as real data.
+ **/
+```

+ 35 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/doc/Doxyfile.in

@@ -0,0 +1,35 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+DOXYFILE_ENCODING      = UTF-8
+PROJECT_NAME           = "libhdfspp"
+OUTPUT_DIRECTORY       = doc
+TAB_SIZE               = 2
+MARKDOWN_SUPPORT       = YES
+BUILTIN_STL_SUPPORT    = YES
+
+
+INPUT                  = @PROJECT_SOURCE_DIR@/doc/mainpage.dox \
+                         @PROJECT_SOURCE_DIR@/include/libhdfspp \
+                         @PROJECT_SOURCE_DIR@/lib/common/continuation \
+
+INPUT_ENCODING         = UTF-8
+RECURSIVE              = NO
+
+GENERATE_HTML          = YES
+GENERATE_LATEX         = NO

+ 5 - 13
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/InvalidAllocationTagException.java → hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/doc/mainpage.dox

@@ -16,19 +16,11 @@
  * limitations under the License.
  */
 
-package org.apache.hadoop.yarn.exceptions;
-
 /**
- * This exception is thrown by
- * {@link
- * org.apache.hadoop.yarn.api.records.AllocationTagNamespace#parse(String)}
- * when it fails to parse a namespace.
- */
-public class InvalidAllocationTagException extends YarnException {
+\mainpage libhdfs++
 
-  private static final long serialVersionUID = 1L;
+libhdfs++ is a modern implementation of HDFS client in C++11. It is
+optimized for the Massive Parallel Processing (MPP) applications that
+access thousands of files concurrently in HDFS.
 
-  public InvalidAllocationTagException(String message) {
-    super(message);
-  }
-}
+*/

+ 20 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/CMakeLists.txt

@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_subdirectory(c)
+add_subdirectory(cc)

+ 20 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/CMakeLists.txt

@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_subdirectory(cat)
+add_subdirectory(connect_cancel)

+ 27 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/cat/CMakeLists.txt

@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default LIBHDFSPP_DIR to the default install location.  You can override
+#    it by add -DLIBHDFSPP_DIR=... to your cmake invocation
+set(LIBHDFSPP_DIR CACHE STRING ${CMAKE_INSTALL_PREFIX})
+
+include_directories( ${LIBHDFSPP_DIR}/include )
+link_directories( ${LIBHDFSPP_DIR}/lib )
+
+add_executable(cat_c cat.c)
+target_link_libraries(cat_c hdfspp_static uriparser2)

+ 121 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/cat/cat.c

@@ -0,0 +1,121 @@
+/*
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+*/
+
+/*
+  A a stripped down version of unix's "cat".
+  Doesn't deal with any flags for now, will just attempt to read the whole file.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "hdfspp/hdfs_ext.h"
+#include "uriparser2/uriparser2.h"
+#include "common/util_c.h"
+
+#define SCHEME "hdfs"
+#define BUF_SIZE 1048576 //1 MB
+static char input_buffer[BUF_SIZE];
+
+int main(int argc, char** argv) {
+
+  char error_text[1024];
+  if (argc != 2) {
+    fprintf(stderr, "usage: cat [hdfs://[<hostname>:<port>]]/<path-to-file>\n");
+    return 1;
+  }
+
+  URI * uri = NULL;
+  const char * uri_path = argv[1];
+
+  //Separate check for scheme is required, otherwise uriparser2.h library causes memory issues under valgrind
+  const char * scheme_end = strstr(uri_path, "://");
+  if (scheme_end) {
+    if (strncmp(uri_path, SCHEME, strlen(SCHEME)) != 0) {
+      fprintf(stderr, "Scheme %.*s:// is not supported.\n", (int) (scheme_end - uri_path), uri_path);
+      return 1;
+    } else {
+      uri = uri_parse(uri_path);
+    }
+  }
+  if (!uri) {
+    fprintf(stderr, "Malformed URI: %s\n", uri_path);
+    return 1;
+  }
+
+  struct hdfsBuilder* builder = hdfsNewBuilder();
+  if (uri->host)
+    hdfsBuilderSetNameNode(builder, uri->host);
+  if (uri->port != 0)
+    hdfsBuilderSetNameNodePort(builder, uri->port);
+
+  hdfsFS fs = hdfsBuilderConnect(builder);
+  if (fs == NULL) {
+    hdfsGetLastError(error_text, sizeof(error_text));
+    const char * host = uri->host ? uri->host : "<default>";
+    int port = uri->port;
+    if (port == 0)
+      port = 8020;
+    fprintf(stderr, "Unable to connect to %s:%d, hdfsConnect returned null.\n%s\n",
+            host, port, error_text);
+    return 1;
+  }
+
+  hdfsFile file = hdfsOpenFile(fs, uri->path, 0, 0, 0, 0);
+  if (NULL == file) {
+    hdfsGetLastError(error_text, sizeof(error_text));
+    fprintf(stderr, "Unable to open file %s: %s\n", uri->path, error_text );
+    hdfsDisconnect(fs);
+    hdfsFreeBuilder(builder);
+    return 1;
+  }
+
+  ssize_t read_bytes_count = 0;
+  ssize_t last_read_bytes = 0;
+
+  while (0 < (last_read_bytes =
+                  hdfsPread(fs, file, read_bytes_count, input_buffer, sizeof(input_buffer)))) {
+    fwrite(input_buffer, last_read_bytes, 1, stdout);
+    read_bytes_count += last_read_bytes;
+  }
+
+  int res = 0;
+  res = hdfsCloseFile(fs, file);
+  if (0 != res) {
+    hdfsGetLastError(error_text, sizeof(error_text));
+    fprintf(stderr, "Error closing file: %s\n", error_text);
+    hdfsDisconnect(fs);
+    hdfsFreeBuilder(builder);
+    return 1;
+  }
+
+  res = hdfsDisconnect(fs);
+  if (0 != res) {
+    hdfsGetLastError(error_text, sizeof(error_text));
+    fprintf(stderr, "Error disconnecting filesystem: %s", error_text);
+    hdfsFreeBuilder(builder);
+    return 1;
+  }
+
+  hdfsFreeBuilder(builder);
+  free(uri);
+  // Clean up static data and prevent valgrind memory leaks
+  ShutdownProtobufLibrary_C();
+  return 0;
+}

+ 27 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/connect_cancel/CMakeLists.txt

@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default LIBHDFSPP_DIR to the default install location.  You can override
+#    it by add -DLIBHDFSPP_DIR=... to your cmake invocation
+set(LIBHDFSPP_DIR CACHE STRING ${CMAKE_INSTALL_PREFIX})
+
+include_directories( ${LIBHDFSPP_DIR}/include )
+link_directories( ${LIBHDFSPP_DIR}/lib )
+
+add_executable(connect_cancel_c connect_cancel.c)
+target_link_libraries(connect_cancel_c hdfspp_static uriparser2)

+ 107 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/c/connect_cancel/connect_cancel.c

@@ -0,0 +1,107 @@
+/*
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+*/
+
+/*
+  Attempt to connect to a cluster and use Control-C to bail out if it takes a while.
+  Valid config must be in environment variable $HADOOP_CONF_DIR
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include "hdfspp/hdfs_ext.h"
+#include "common/util_c.h"
+
+#define ERROR_BUFFER_SIZE 1024
+
+// Global so signal handler can get at it
+hdfsFS fs = NULL;
+
+const char *catch_enter  = "In signal handler, going to try and cancel.\n";
+const char *catch_cancel = "hdfsCancelPendingConnect has been canceled in the signal handler.\n";
+const char *catch_exit   = "Exiting the signal handler.\n";
+
+// Print to stdout without calling malloc or otherwise indirectly modify userspace state.
+// Write calls to stdout may still interleave with stuff coming from elsewhere.
+static void sighandler_direct_stdout(const char *msg) {
+  if(!msg)
+    return;
+  ssize_t res = write(1 /*posix stdout fd*/, msg, strlen(msg));
+  (void)res;
+}
+
+static void sig_catch(int val) {
+  // Beware of calling things that aren't reentrant e.g. malloc while in a signal handler.
+  sighandler_direct_stdout(catch_enter);
+
+  if(fs) {
+    hdfsCancelPendingConnection(fs);
+    sighandler_direct_stdout(catch_cancel);
+  }
+  sighandler_direct_stdout(catch_exit);
+}
+
+
+int main(int argc, char** argv) {
+  hdfsSetLoggingLevel(HDFSPP_LOG_LEVEL_INFO);
+  signal(SIGINT, sig_catch);
+
+  char error_text[ERROR_BUFFER_SIZE];
+  if (argc != 1) {
+    fprintf(stderr, "usage: ./connect_cancel_c\n");
+    ShutdownProtobufLibrary_C();
+    exit(EXIT_FAILURE);
+  }
+
+  const char *hdfsconfdir = getenv("HADOOP_CONF_DIR");
+  if(!hdfsconfdir) {
+    fprintf(stderr, "$HADOOP_CONF_DIR must be set\n");
+    ShutdownProtobufLibrary_C();
+    exit(EXIT_FAILURE);
+  }
+
+  struct hdfsBuilder* builder = hdfsNewBuilderFromDirectory(hdfsconfdir);
+
+  fs = hdfsAllocateFileSystem(builder);
+  if (fs == NULL) {
+    hdfsGetLastError(error_text, ERROR_BUFFER_SIZE);
+    fprintf(stderr, "hdfsAllocateFileSystem returned null.\n%s\n", error_text);
+    hdfsFreeBuilder(builder);
+    ShutdownProtobufLibrary_C();
+    exit(EXIT_FAILURE);
+  }
+
+  int connected = hdfsConnectAllocated(fs, builder);
+  if (connected != 0) {
+    hdfsGetLastError(error_text, ERROR_BUFFER_SIZE);
+    fprintf(stderr, "hdfsConnectAllocated errored.\n%s\n", error_text);
+    hdfsFreeBuilder(builder);
+    ShutdownProtobufLibrary_C();
+    exit(EXIT_FAILURE);
+  }
+
+  hdfsDisconnect(fs);
+  hdfsFreeBuilder(builder);
+  // Clean up static data and prevent valgrind memory leaks
+  ShutdownProtobufLibrary_C();
+  return 0;
+}

+ 24 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/CMakeLists.txt

@@ -0,0 +1,24 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+include_directories( ../../tools )
+
+add_subdirectory(cat)
+add_subdirectory(gendirs)
+add_subdirectory(find)
+add_subdirectory(connect_cancel)

+ 27 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/cat/CMakeLists.txt

@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default LIBHDFSPP_DIR to the default install location.  You can override
+#    it by add -DLIBHDFSPP_DIR=... to your cmake invocation
+set(LIBHDFSPP_DIR CACHE STRING ${CMAKE_INSTALL_PREFIX})
+
+include_directories( ${LIBHDFSPP_DIR}/include )
+link_directories( ${LIBHDFSPP_DIR}/lib )
+
+add_executable(cat cat.cc)
+target_link_libraries(cat tools_common hdfspp_static)

+ 89 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/cat/cat.cc

@@ -0,0 +1,89 @@
+/*
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+*/
+
+/**
+   * Unix-like cat tool example.
+   *
+   * Reads the specified file from HDFS and outputs to stdout.
+   *
+   * Usage: cat /<path-to-file>
+   *
+   * Example: cat /dir/file
+   *
+   * @param path-to-file    Absolute path to the file to read.
+   *
+   **/
+
+#include "hdfspp/hdfspp.h"
+#include <google/protobuf/stubs/common.h>
+#include "tools_common.h"
+
+const std::size_t BUF_SIZE = 1048576; //1 MB
+static char input_buffer[BUF_SIZE];
+
+int main(int argc, char *argv[]) {
+  if (argc != 2) {
+    std::cerr << "usage: cat /<path-to-file>" << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  std::string path = argv[1];
+
+  //Building a URI object from the given uri path
+  hdfs::URI uri = hdfs::parse_path_or_exit(path);
+
+  std::shared_ptr<hdfs::FileSystem> fs = hdfs::doConnect(uri, false);
+  if (!fs) {
+    std::cerr << "Could not connect the file system. " << std::endl;
+    exit(EXIT_FAILURE);
+  }
+
+  hdfs::FileHandle *file_raw = nullptr;
+  hdfs::Status status = fs->Open(path, &file_raw);
+  if (!status.ok()) {
+    std::cerr << "Could not open file " << path << ". " << status.ToString() << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  //wrapping file_raw into a unique pointer to guarantee deletion
+  std::unique_ptr<hdfs::FileHandle> file(file_raw);
+
+  ssize_t total_bytes_read = 0;
+  size_t last_bytes_read = 0;
+
+  do{
+    //Reading file chunks
+    status = file->Read(input_buffer, sizeof(input_buffer), &last_bytes_read);
+    if(status.ok()) {
+      //Writing file chunks to stdout
+      fwrite(input_buffer, last_bytes_read, 1, stdout);
+      total_bytes_read += last_bytes_read;
+    } else {
+      if(status.is_invalid_offset()){
+        //Reached the end of the file
+        break;
+      } else {
+        std::cerr << "Error reading the file: " << status.ToString() << std::endl;
+        exit(EXIT_FAILURE);
+      }
+    }
+  } while (last_bytes_read > 0);
+
+  // Clean up static data and prevent valgrind memory leaks
+  google::protobuf::ShutdownProtobufLibrary();
+  return 0;
+}

+ 27 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/connect_cancel/CMakeLists.txt

@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default LIBHDFSPP_DIR to the default install location.  You can override
+#    it by add -DLIBHDFSPP_DIR=... to your cmake invocation
+set(LIBHDFSPP_DIR CACHE STRING ${CMAKE_INSTALL_PREFIX})
+
+include_directories( ${LIBHDFSPP_DIR}/include )
+link_directories( ${LIBHDFSPP_DIR}/lib )
+
+add_executable(connect_cancel connect_cancel.cc)
+target_link_libraries(connect_cancel hdfspp_static)

+ 154 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/connect_cancel/connect_cancel.cc

@@ -0,0 +1,154 @@
+/*
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+*/
+
+
+#include "hdfspp/hdfspp.h"
+#include "common/hdfs_configuration.h"
+#include "common/configuration_loader.h"
+
+#include <google/protobuf/stubs/common.h>
+
+#include <signal.h>
+#include <unistd.h>
+
+#include <thread>
+#include <iostream>
+
+// Simple example of how to cancel an async connect call.
+// Here Control-C (SIGINT) is caught in order to invoke the FS level cancel and
+// properly tear down the process.  Valgrind should show no leaked memory on exit
+// when cancel has been called.  URI parsing code is omitted and defaultFs from
+// /etc/hadoop/conf or $HADOOP_CONF_DIR is always used.
+
+// Scoped globally to make it simple to reference from the signal handler.
+std::shared_ptr<hdfs::FileSystem> fs;
+
+const std::string catch_enter("In signal handler, going to try and cancel FileSystem::Connect.\n");
+const std::string catch_cancel("FileSystem::Cancel has been canceled in the signal handler.\n");
+const std::string catch_exit("Exiting the signal handler.\n");
+
+// Avoid IO reentrancy issues, see comments in signal handler below.
+// It's possible that the write interleaves with another write call,
+// but it won't corrupt the stack or heap.
+static void sighandler_direct_stdout(const std::string &msg) {
+  ssize_t res = ::write(1 /*posix stdout FD*/, msg.data(), msg.size());
+  // In production you'd want to check res, but error handling code will
+  // need to be fairly application specific if it's going to properly
+  // avoid reentrant calls to malloc.
+  (void)res;
+}
+
+// Signal handler to make a SIGINT call cancel rather than exit().
+static void sig_catch(int val) {
+  (void)val;
+  // This is avoiding the tricky bits of signal handling, notably that the
+  // underlying string manipulation and IO functions used by the the logger
+  // are unlikely to be reentrant.
+  //
+  // Production code could mask out all logging on handler entry and enable
+  // it again on exit; here we just assume it's "good enough" and some
+  // (possibly broken) log messages are better than none.
+
+  sighandler_direct_stdout(catch_enter);
+  if(fs) {
+    // This will invoke the callback immediately with an OperationCanceled status
+    fs->CancelPendingConnect();
+    sighandler_direct_stdout(catch_cancel);
+  }
+  sighandler_direct_stdout(catch_exit);
+}
+
+
+int main(int arg_token_count, const char **args) {
+  (void)args;
+  if(arg_token_count != 1) {
+    std::cerr << "usage: ./connect_cancel";
+    google::protobuf::ShutdownProtobufLibrary();
+    exit(EXIT_FAILURE);
+  }
+
+  // Register signal handle to asynchronously invoke cancel from outside the main thread.
+  signal(SIGINT, sig_catch);
+
+  // Generic setup/config code much like the other examples.
+  hdfs::Options options;
+  //Setting the config path to the default: "$HADOOP_CONF_DIR" or "/etc/hadoop/conf"
+  hdfs::ConfigurationLoader loader;
+  //Loading default config files core-site.xml and hdfs-site.xml from the config path
+  hdfs::optional<hdfs::HdfsConfiguration> config = loader.LoadDefaultResources<hdfs::HdfsConfiguration>();
+  //TODO: HDFS-9539 - after this is resolved, valid config will always be returned.
+  if(config){
+    //Loading options from the config
+    options = config->GetOptions();
+  }
+
+
+  // Start an IoService and some worker threads
+  std::shared_ptr<hdfs::IoService> service = hdfs::IoService::MakeShared();
+  if(nullptr == service) {
+    std::cerr << "Unable to create IoService" << std::endl;
+    fs.reset();
+    // Nasty hack to clean up for valgrind since we don't have the C++17 optional<T>::reset method
+    config = decltype(config)();
+    google::protobuf::ShutdownProtobufLibrary();
+    exit(EXIT_FAILURE);
+  }
+
+  unsigned int worker_count = service->InitDefaultWorkers();
+  if(worker_count < 1) {
+    std::cerr << "Unable to create IoService worker threads";
+    fs.reset();
+    service->Stop();
+    config = decltype(config)();
+    google::protobuf::ShutdownProtobufLibrary();
+    exit(EXIT_FAILURE);
+  }
+
+  // Set up and connect to the FileSystem
+  fs.reset(hdfs::FileSystem::New(service, "", options));
+  if(nullptr == fs) {
+    std::cerr << "Unable to create FileSystem" << std::endl;
+    fs.reset();
+    service->Stop();
+    config = decltype(config)();
+    google::protobuf::ShutdownProtobufLibrary();
+    exit(EXIT_FAILURE);
+  }
+
+  hdfs::Status status = fs->ConnectToDefaultFs();
+  if (!status.ok()) {
+    if(!options.defaultFS.get_host().empty()){
+      std::cerr << "Error connecting to " << options.defaultFS << ". " << status.ToString() << std::endl;
+    } else {
+      std::cerr << "Error connecting to the cluster: defaultFS is empty. " << status.ToString() << std::endl;
+    }
+    fs.reset();
+    service->Stop();
+    config = decltype(config)();
+    google::protobuf::ShutdownProtobufLibrary();
+    exit(EXIT_FAILURE);
+  }
+
+  fs.reset();
+  service->Stop();
+  config = decltype(config)();
+  google::protobuf::ShutdownProtobufLibrary();
+
+  return 0;
+}

+ 27 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/find/CMakeLists.txt

@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default LIBHDFSPP_DIR to the default install location.  You can override
+#    it by add -DLIBHDFSPP_DIR=... to your cmake invocation
+set(LIBHDFSPP_DIR CACHE STRING ${CMAKE_INSTALL_PREFIX})
+
+include_directories( ${LIBHDFSPP_DIR}/include )
+link_directories( ${LIBHDFSPP_DIR}/lib )
+
+add_executable(find find.cc)
+target_link_libraries(find tools_common hdfspp_static)

+ 140 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/find/find.cc

@@ -0,0 +1,140 @@
+/*
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+*/
+
+/**
+   * A parallel find tool example.
+   *
+   * Finds all files matching the specified name recursively starting from the
+   * specified directory and prints their filepaths. Works either synchronously
+   * or asynchronously.
+   *
+   * Usage: find /<path-to-file> <file-name> <use_async>
+   *
+   * Example: find /dir?/tree* some?file*name 1
+   *
+   * @param path-to-file    Absolute path at which to begin search, can have wild
+   *                        cards and must be non-blank
+   * @param file-name       Name to find, can have wild cards and must be non-blank
+   * @param use_async       If set to 1 it prints out results asynchronously as
+   *                        they arrive. If set to 0 results are printed in one
+   *                        big chunk when it becomes available.
+   *
+   **/
+
+#include "hdfspp/hdfspp.h"
+#include <google/protobuf/stubs/common.h>
+#include <future>
+#include "tools_common.h"
+
+void SyncFind(std::shared_ptr<hdfs::FileSystem> fs, const std::string &path, const std::string &name){
+  std::vector<hdfs::StatInfo> results;
+  //Synchronous call to Find
+  hdfs::Status stat = fs->Find(path, name, hdfs::FileSystem::GetDefaultFindMaxDepth(), &results);
+
+  if (!stat.ok()) {
+    std::cerr << "Error: " << stat.ToString() << std::endl;
+  }
+
+  if(results.empty()){
+    std::cout << "Nothing Found" << std::endl;
+  } else {
+    //Printing out the results
+    for (hdfs::StatInfo const& si : results) {
+      std::cout << si.full_path << std::endl;
+    }
+  }
+}
+
+void AsyncFind(std::shared_ptr<hdfs::FileSystem> fs, const std::string &path, const std::string &name){
+  std::promise<void> promise;
+  std::future<void> future(promise.get_future());
+  bool something_found = false;
+  hdfs::Status status = hdfs::Status::OK();
+
+  /**
+    * Keep requesting more until we get the entire listing. Set the promise
+    * when we have the entire listing to stop.
+    *
+    * Find guarantees that the handler will only be called once at a time,
+    * so we do not need any locking here
+    */
+  auto handler = [&promise, &status, &something_found]
+                  (const hdfs::Status &s, const std::vector<hdfs::StatInfo> & si, bool has_more_results) -> bool {
+    //Print result chunks as they arrive
+    if(!si.empty()) {
+      something_found = true;
+      for (hdfs::StatInfo const& s : si) {
+        std::cout << s.full_path << std::endl;
+      }
+    }
+    if(!s.ok() && status.ok()){
+      //We make sure we set 'status' only on the first error.
+      status = s;
+    }
+    if (!has_more_results) {
+      promise.set_value();  //set promise
+      return false;         //request stop sending results
+    }
+    return true;  //request more results
+  };
+
+  //Asynchronous call to Find
+  fs->Find(path, name, hdfs::FileSystem::GetDefaultFindMaxDepth(), handler);
+
+  //block until promise is set
+  future.get();
+  if(!status.ok()) {
+    std::cerr << "Error: " << status.ToString() << std::endl;
+  }
+  if(!something_found){
+    std::cout << "Nothing Found" << std::endl;
+  }
+}
+
+int main(int argc, char *argv[]) {
+  if (argc != 4) {
+    std::cerr << "usage: find /<path-to-file> <file-name> <use_async>" << std::endl;
+    exit(EXIT_FAILURE);
+  }
+
+  std::string path = argv[1];
+  std::string name = argv[2];
+  bool use_async = (std::stoi(argv[3]) != 0);
+
+  //Building a URI object from the given uri path
+  hdfs::URI uri = hdfs::parse_path_or_exit(path);
+
+  std::shared_ptr<hdfs::FileSystem> fs = hdfs::doConnect(uri, true);
+  if (!fs) {
+    std::cerr << "Could not connect the file system. " << std::endl;
+    exit(EXIT_FAILURE);
+  }
+
+  if (use_async){
+    //Example of Async find
+    AsyncFind(fs, path, name);
+  } else {
+    //Example of Sync find
+    SyncFind(fs, path, name);
+  }
+
+  // Clean up static data and prevent valgrind memory leaks
+  google::protobuf::ShutdownProtobufLibrary();
+  return 0;
+}

+ 27 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/gendirs/CMakeLists.txt

@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default LIBHDFSPP_DIR to the default install location.  You can override
+#    it by add -DLIBHDFSPP_DIR=... to your cmake invocation
+set(LIBHDFSPP_DIR CACHE STRING ${CMAKE_INSTALL_PREFIX})
+
+include_directories( ${LIBHDFSPP_DIR}/include )
+link_directories( ${LIBHDFSPP_DIR}/lib )
+
+add_executable(gendirs gendirs.cc)
+target_link_libraries(gendirs tools_common hdfspp_static)

+ 122 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/examples/cc/gendirs/gendirs.cc

@@ -0,0 +1,122 @@
+/*
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+*/
+
+/**
+   * A recursive directory generator tool.
+   *
+   * Generates a directory tree with specified depth and fanout starting from
+   * a given path. Generation is asynchronous.
+   *
+   * Usage:   gendirs /<path-to-dir> <depth> <fanout>
+   *
+   * Example: gendirs /dir0 3 10
+   *
+   * @param path-to-dir   Absolute path to the directory tree root where the
+   *                      directory tree will be generated
+   * @param depth         Depth of the directory tree (number of levels from
+   *                      root to leaves)
+   * @param fanout        Fanout of each directory (number of sub-directories to
+   *                      be created inside each directory except leaf directories)
+   *
+   **/
+
+#include "hdfspp/hdfspp.h"
+#include <google/protobuf/stubs/common.h>
+#include <future>
+#include "tools_common.h"
+
+#define DEFAULT_PERMISSIONS 0755
+
+void GenerateDirectories (std::shared_ptr<hdfs::FileSystem> fs, int depth, int level, int fanout, std::string path, std::vector<std::future<hdfs::Status>> & futures) {
+  //Level contains our current depth in the directory tree
+  if(level < depth) {
+    for(int i = 0; i < fanout; i++){
+      //Recursive calls to cover all possible paths from the root to the leave nodes
+      GenerateDirectories(fs, depth, level+1, fanout, path + "dir" + std::to_string(i) + "/", futures);
+    }
+  } else {
+    //We have reached the leaf nodes and now start making calls to create directories
+    //We make a promise which will be set when the call finishes and executes our handler
+    auto callstate = std::make_shared<std::promise<hdfs::Status>>();
+    //Extract a future from this promise
+    std::future<hdfs::Status> future(callstate->get_future());
+    //Save this future to the vector of futures which will be used to wait on all promises
+    //after the whole recursion is done
+    futures.push_back(std::move(future));
+    //Create a handler that will be executed when Mkdirs is done
+    auto handler = [callstate](const hdfs::Status &s) {
+      callstate->set_value(s);
+    };
+    //Asynchronous call to create this directory along with all missing parent directories
+    fs->Mkdirs(path, DEFAULT_PERMISSIONS, true, handler);
+  }
+}
+
+int main(int argc, char *argv[]) {
+  if (argc != 4) {
+    std::cerr << "usage: gendirs /<path-to-dir> <depth> <fanout>" << std::endl;
+    exit(EXIT_FAILURE);
+  }
+
+  std::string path = argv[1];
+  int depth = std::stoi(argv[2]);
+  int fanout = std::stoi(argv[3]);
+
+  //Building a URI object from the given uri path
+  hdfs::URI uri = hdfs::parse_path_or_exit(path);
+
+  std::shared_ptr<hdfs::FileSystem> fs = hdfs::doConnect(uri, true);
+  if (!fs) {
+    std::cerr << "Could not connect the file system. " << std::endl;
+    exit(EXIT_FAILURE);
+  }
+
+  /**
+   * We do not want the recursion to block on anything, therefore we will be
+   * making asynchronous calls recursively, and then just waiting for all
+   * the calls to finish.
+   *
+   * This array of futures will be populated by the recursive function below.
+   * Each new asynchronous Mkdirs call will add a future to this vector, and will
+   * create a promise, which will only be set when the call was completed and
+   * processed. After the whole recursion is complete we will need to wait until
+   * all promises are set before we can exit.
+   **/
+  std::vector<std::future<hdfs::Status>> futures;
+
+  GenerateDirectories(fs, depth, 0, fanout, path + "/", futures);
+
+  /**
+   * We are waiting here until all promises are set, and checking whether
+   * the returned statuses contained any errors.
+   **/
+  for(std::future<hdfs::Status> &fs : futures){
+    hdfs::Status status = fs.get();
+    if (!status.ok()) {
+      std::cerr << "Error: " << status.ToString() << std::endl;
+      exit(EXIT_FAILURE);
+    }
+  }
+
+  std::cout << "All done!" << std::endl;
+
+  // Clean up static data and prevent valgrind memory leaks
+  google::protobuf::ShutdownProtobufLibrary();
+  return 0;
+}

+ 177 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/block_location.h

@@ -0,0 +1,177 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HDFSPP_BLOCK_LOCATION_H
+#define HDFSPP_BLOCK_LOCATION_H
+
+namespace hdfs {
+
+class DNInfo {
+public:
+  DNInfo() : xfer_port_(-1), info_port_(-1), IPC_port_(-1), info_secure_port_(-1) {}
+
+  std::string getHostname() const {
+    return hostname_;
+  }
+
+  void setHostname(const std::string & hostname) {
+    this->hostname_ = hostname;
+  }
+
+  std::string getIPAddr() const {
+    return ip_addr_;
+  }
+
+  void setIPAddr(const std::string & ip_addr) {
+    this->ip_addr_ = ip_addr;
+  }
+
+  std::string getNetworkLocation() const {
+    return network_location_;
+  }
+
+  void setNetworkLocation(const std::string & location) {
+    this->network_location_ = location;
+  }
+
+  int getXferPort() const {
+    return xfer_port_;
+  }
+
+  void setXferPort(int xfer_port) {
+    this->xfer_port_ = xfer_port;
+  }
+
+  int getInfoPort() const {
+    return info_port_;
+  }
+
+  void setInfoPort(int info_port) {
+    this->info_port_ = info_port;
+  }
+
+  int getIPCPort() const {
+    return IPC_port_;
+  }
+
+  void setIPCPort(int IPC_port) {
+    this->IPC_port_ = IPC_port;
+  }
+
+  int getInfoSecurePort() const {
+    return info_secure_port_;
+  }
+
+  void setInfoSecurePort(int info_secure_port) {
+    this->info_secure_port_ = info_secure_port;
+  }
+private:
+  std::string hostname_;
+  std::string ip_addr_;
+  std::string network_location_;
+  int         xfer_port_;
+  int         info_port_;
+  int         IPC_port_;
+  int         info_secure_port_;
+};
+
+class BlockLocation {
+public:
+    bool isCorrupt() const {
+        return corrupt_;
+    }
+
+    void setCorrupt(bool corrupt) {
+        this->corrupt_ = corrupt;
+    }
+
+    int64_t getLength() const {
+        return length_;
+    }
+
+    void setLength(int64_t length) {
+        this->length_ = length;
+    }
+
+    int64_t getOffset() const {
+        return offset_;
+    }
+
+    void setOffset(int64_t offset) {
+        this->offset_ = offset;
+    }
+
+    const std::vector<DNInfo> & getDataNodes() const {
+        return dn_info_;
+    }
+
+    void setDataNodes(const std::vector<DNInfo> & dn_info) {
+        this->dn_info_ = dn_info;
+    }
+
+private:
+    bool corrupt_;
+    int64_t length_;
+    int64_t offset_;  // Offset of the block in the file
+    std::vector<DNInfo> dn_info_; // Info about who stores each block
+};
+
+class FileBlockLocation {
+public:
+  uint64_t getFileLength() {
+    return fileLength_;
+  }
+
+  void setFileLength(uint64_t fileLength) {
+    this->fileLength_ = fileLength;
+  }
+
+  bool isLastBlockComplete() const {
+    return this->lastBlockComplete_;
+  }
+
+  void setLastBlockComplete(bool lastBlockComplete) {
+    this->lastBlockComplete_ = lastBlockComplete;
+  }
+
+  bool isUnderConstruction() const {
+    return underConstruction_;
+  }
+
+  void setUnderConstruction(bool underConstruction) {
+    this->underConstruction_ = underConstruction;
+  }
+
+  const std::vector<BlockLocation> & getBlockLocations() const {
+    return blockLocations_;
+  }
+
+  void setBlockLocations(const std::vector<BlockLocation> & blockLocations) {
+    this->blockLocations_ = blockLocations;
+  }
+private:
+  uint64_t fileLength_;
+  bool     lastBlockComplete_;
+  bool     underConstruction_;
+  std::vector<BlockLocation> blockLocations_;
+};
+
+} // namespace hdfs
+
+
+#endif /* HDFSPP_BLOCK_LOCATION_H */

+ 68 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/config_parser.h

@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIBHDFSPP_CONFIGPARSER_H_
+#define LIBHDFSPP_CONFIGPARSER_H_
+
+#include "hdfspp/options.h"
+#include "hdfspp/uri.h"
+#include "hdfspp/status.h"
+
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace hdfs {
+
+class ConfigParser {
+ public:
+  ConfigParser();
+  ConfigParser(const std::string& path);
+  ConfigParser(const std::vector<std::string>& configDirectories);
+  ~ConfigParser();
+  ConfigParser(ConfigParser&&);
+  ConfigParser& operator=(ConfigParser&&);
+
+  bool LoadDefaultResources();
+  std::vector<std::pair<std::string, Status> > ValidateResources() const;
+
+  // Return false if value couldn't be found or cast to desired type
+  bool get_int(const std::string& key, int& outval) const;
+  int get_int_or(const std::string& key, const int defaultval) const;
+
+  bool get_string(const std::string& key, std::string& outval) const;
+  std::string get_string_or(const std::string& key, const std::string& defaultval) const;
+
+  bool get_bool(const std::string& key, bool& outval) const;
+  bool get_bool_or(const std::string& key, const bool defaultval) const;
+
+  bool get_double(const std::string& key, double& outval) const;
+  double get_double_or(const std::string& key, const double defaultval) const;
+
+  bool get_uri(const std::string& key, URI& outval) const;
+  URI get_uri_or(const std::string& key, const URI& defaultval) const;
+
+  bool get_options(Options& outval) const;
+  Options get_options_or(const Options& defaultval) const;
+
+ private:
+  class impl;
+  std::unique_ptr<impl> pImpl;
+};
+
+}
+#endif

+ 48 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/content_summary.h

@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef HDFSPP_CONTENT_SUMMARY_H_
+#define HDFSPP_CONTENT_SUMMARY_H_
+
+#include <string>
+
+namespace hdfs {
+
+/**
+ * Content summary is assumed to be unchanging for the duration of the operation
+ */
+struct ContentSummary {
+  uint64_t length;
+  uint64_t filecount;
+  uint64_t directorycount;
+  uint64_t quota;
+  uint64_t spaceconsumed;
+  uint64_t spacequota;
+  std::string path;
+
+  ContentSummary();
+
+  //Converts ContentSummary object to std::string (hdfs_count format)
+  std::string str(bool include_quota) const;
+
+  //Converts ContentSummary object to std::string (hdfs_du format)
+  std::string str_du() const;
+};
+
+}
+
+#endif

+ 141 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/events.h

@@ -0,0 +1,141 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HDFSPP_EVENTS
+#define HDFSPP_EVENTS
+
+#include "hdfspp/status.h"
+
+#include <functional>
+
+namespace hdfs {
+
+/*
+ * Supported event names.  These names will stay consistent in libhdfs callbacks.
+ *
+ * Other events not listed here may be seen, but they are not stable and
+ * should not be counted on.  May need to be broken up into more components
+ * as more events are added.
+ */
+
+static constexpr const char * FS_NN_CONNECT_EVENT = "NN::connect";
+static constexpr const char * FS_NN_READ_EVENT = "NN::read";
+static constexpr const char * FS_NN_WRITE_EVENT = "NN::write";
+
+static constexpr const char * FILE_DN_CONNECT_EVENT = "DN::connect";
+static constexpr const char * FILE_DN_READ_EVENT = "DN::read";
+static constexpr const char * FILE_DN_WRITE_EVENT = "DN::write";
+
+
+// NN failover event due to issues with the current NN; might be standby, might be dead.
+// Invokes the fs_event_callback using the nameservice name in the cluster string.
+// The uint64_t value argument holds an address that can be reinterpreted as a const char *
+// and provides the full URI of the node the failover will attempt to connect to next.
+static constexpr const char * FS_NN_FAILOVER_EVENT = "NN::failover";
+
+// Invoked when RpcConnection tries to use an empty set of endpoints to figure out
+// which NN in a HA cluster to connect to.
+static constexpr const char * FS_NN_EMPTY_ENDPOINTS_EVENT = "NN::bad_failover::no_endpoints";
+
+// Invoked prior to determining if failed NN rpc calls should be retried or discarded.
+static constexpr const char * FS_NN_PRE_RPC_RETRY_EVENT = "NN::rpc::get_retry_action";
+
+class event_response {
+public:
+  // Helper factories
+  // The default ok response; libhdfspp should continue normally
+  static event_response make_ok() {
+    return event_response(kOk);
+  }
+  static event_response make_caught_std_exception(const char *what) {
+    return event_response(kCaughtStdException, what);
+  }
+  static event_response make_caught_unknown_exception() {
+    return event_response(kCaughtUnknownException);
+  }
+
+  // High level classification of responses
+  enum event_response_type {
+    kOk = 0,
+    // User supplied callback threw.
+    // Std exceptions will copy the what() string
+    kCaughtStdException = 1,
+    kCaughtUnknownException = 2,
+
+    // Responses to be used in testing only
+    kTest_Error = 100
+  };
+
+  event_response_type response_type() { return response_type_; }
+
+private:
+  // Use factories to construct for now
+  event_response();
+  event_response(event_response_type type)
+            : response_type_(type)
+  {
+    if(type == kCaughtUnknownException) {
+      status_ = Status::Exception("c++ unknown exception", "");
+    }
+  }
+  event_response(event_response_type type, const char *what)
+            : response_type_(type),
+              exception_msg_(what==nullptr ? "" : what)
+  {
+    status_ = Status::Exception("c++ std::exception", exception_msg_.c_str());
+  }
+
+
+  event_response_type response_type_;
+
+  // use to hold what str if event handler threw
+  std::string exception_msg_;
+
+
+///////////////////////////////////////////////
+//
+//   Testing support
+//
+// The consumer can stimulate errors
+// within libhdfdspp by returning a Status from the callback.
+///////////////////////////////////////////////
+public:
+  static event_response test_err(const Status &status) {
+    return event_response(status);
+  }
+
+  Status status() { return status_; }
+
+private:
+  event_response(const Status & status) :
+    response_type_(event_response_type::kTest_Error), status_(status) {}
+
+  Status status_; // To be used with kTest_Error
+};
+
+/* callback signature */
+typedef std::function<event_response (const char * event,
+                                      const char * cluster,
+                                      int64_t value)> fs_event_callback;
+
+typedef std::function<event_response (const char * event,
+                                      const char * cluster,
+                                      const char * file,
+                                      int64_t value)>file_event_callback;
+}
+#endif

+ 48 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/fsinfo.h

@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef HDFSPP_FSINFO_H_
+#define HDFSPP_FSINFO_H_
+
+#include <string>
+
+namespace hdfs {
+
+/**
+ * Information that is assumed to be unchanging about a file system for the duration of
+ * the operations.
+ */
+struct FsInfo {
+
+  uint64_t capacity;
+  uint64_t used;
+  uint64_t remaining;
+  uint64_t under_replicated;
+  uint64_t corrupt_blocks;
+  uint64_t missing_blocks;
+  uint64_t missing_repl_one_blocks;
+  uint64_t blocks_in_future;
+
+  FsInfo();
+
+  //Converts FsInfo object to std::string (hdfs_df format)
+  std::string str(const std::string fs_name) const;
+};
+
+}
+
+#endif

+ 394 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/hdfs_ext.h

@@ -0,0 +1,394 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIBHDFSPP_HDFS_HDFSEXT
+#define LIBHDFSPP_HDFS_HDFSEXT
+
+#include <hdfspp/log.h>
+
+/* get typdefs and #defines from libhdfs' hdfs.h to stay consistent */
+#include <hdfs/hdfs.h>
+
+/**
+ *  Note: The #defines below are copied directly from libhdfs'
+ *  hdfs.h.  LIBHDFS_EXTERNAL gets explicitly #undefed at the
+ *  end of the file so it must be redefined here.
+ **/
+
+#ifdef WIN32
+    #ifdef LIBHDFS_DLL_EXPORT
+        #define LIBHDFS_EXTERNAL __declspec(dllexport)
+    #elif LIBHDFS_DLL_IMPORT
+        #define LIBHDFS_EXTERNAL __declspec(dllimport)
+    #else
+        #define LIBHDFS_EXTERNAL
+    #endif
+#else
+    #ifdef LIBHDFS_DLL_EXPORT
+        #define LIBHDFS_EXTERNAL __attribute__((visibility("default")))
+    #elif LIBHDFS_DLL_IMPORT
+        #define LIBHDFS_EXTERNAL __attribute__((visibility("default")))
+    #else
+        #define LIBHDFS_EXTERNAL
+    #endif
+#endif
+
+
+/**
+ * Keep C bindings that are libhdfs++ specific in here.
+ **/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ *  Reads the last error, if any, that happened in this thread
+ *  into the user supplied buffer.
+ *  @param buf  A chunk of memory with room for the error string.
+ *  @param len  Size of the buffer, if the message is longer than
+ *              len len-1 bytes of the message will be copied.
+ *  @return     0 on successful read of the last error, -1 otherwise.
+ **/
+LIBHDFS_EXTERNAL
+int hdfsGetLastError(char *buf, int len);
+
+
+/**
+ *  Cancels operations being made by the FileHandle.
+ *  Note: Cancel cannot be reversed.  This is intended
+ *  to be used before hdfsClose to avoid waiting for
+ *  operations to complete.
+ **/
+LIBHDFS_EXTERNAL
+int hdfsCancel(hdfsFS fs, hdfsFile file);
+
+/**
+ * Create an HDFS builder, using the configuration XML files from the indicated
+ * directory.  If the directory does not exist, or contains no configuration
+ * XML files, a Builder using all default values will be returned.
+ *
+ * @return The HDFS builder, or NULL on error.
+ */
+struct hdfsBuilder *hdfsNewBuilderFromDirectory(const char * configDirectory);
+
+
+/**
+ * Get a configuration string from the settings currently read into the builder.
+ *
+ * @param key      The key to find
+ * @param val      (out param) The value.  This will be set to NULL if the
+ *                 key isn't found.  You must free this string with
+ *                 hdfsConfStrFree.
+ *
+ * @return         0 on success; -1 otherwise.
+ *                 Failure to find the key is not an error.
+ */
+LIBHDFS_EXTERNAL
+int hdfsBuilderConfGetStr(struct hdfsBuilder *bld, const char *key,
+                          char **val);
+
+/**
+ * Get a configuration integer from the settings currently read into the builder.
+ *
+ * @param key      The key to find
+ * @param val      (out param) The value.  This will NOT be changed if the
+ *                 key isn't found.
+ *
+ * @return         0 on success; -1 otherwise.
+ *                 Failure to find the key is not an error.
+ */
+LIBHDFS_EXTERNAL
+int hdfsBuilderConfGetInt(struct hdfsBuilder *bld, const char *key, int32_t *val);
+
+
+/**
+ * Get a configuration long from the settings currently read into the builder.
+ *
+ * @param key      The key to find
+ * @param val      (out param) The value.  This will NOT be changed if the
+ *                 key isn't found.
+ *
+ * @return         0 on success; -1 otherwise.
+ *                 Failure to find the key is not an error.
+ */
+LIBHDFS_EXTERNAL
+int hdfsBuilderConfGetLong(struct hdfsBuilder *bld, const char *key, int64_t *val);
+
+struct hdfsDNInfo {
+  const char *    ip_address;
+  const char *    hostname;
+  const char *    network_location;
+  int             xfer_port;
+  int             info_port;
+  int             IPC_port;
+  int             info_secure_port;
+};
+
+struct hdfsBlockInfo {
+    uint64_t            start_offset;
+    uint64_t            num_bytes;
+
+    size_t              num_locations;
+    struct hdfsDNInfo * locations;
+};
+
+struct hdfsBlockLocations
+{
+    uint64_t               fileLength;
+    int                    isLastBlockComplete;
+    int                    isUnderConstruction;
+
+    size_t                 num_blocks;
+    struct hdfsBlockInfo * blocks;
+};
+
+/**
+ * Returns the block information and data nodes associated with a particular file.
+ *
+ * The hdfsBlockLocations structure will have zero or more hdfsBlockInfo elements,
+ * which will have zero or more ip_addr elements indicating which datanodes have
+ * each block.
+ *
+ * @param fs         A connected hdfs instance
+ * @param path       Path of the file to query
+ * @param locations  The address of an output pointer to contain the block information.
+ *                   On success, this pointer must be later freed with hdfsFreeBlockLocations.
+ *
+ * @return         0 on success; -1 otherwise.
+ *                 If the file does not exist, -1 will be returned and errno will be set.
+ */
+LIBHDFS_EXTERNAL
+int hdfsGetBlockLocations(hdfsFS fs, const char *path, struct hdfsBlockLocations ** locations);
+
+/**
+ * Frees up an hdfsBlockLocations pointer allocated by hdfsGetBlockLocations.
+ *
+ * @param locations    The previously-populated pointer allocated by hdfsGetBlockLocations
+ * @return             0 on success, -1 on error
+ */
+LIBHDFS_EXTERNAL
+int hdfsFreeBlockLocations(struct hdfsBlockLocations * locations);
+
+
+
+
+/**
+ *  Client can supply a C style function pointer to be invoked any time something
+ *  is logged.  Unlike the C++ logger this will not filter by level or component,
+ *  it is up to the consumer to throw away messages they don't want.
+ *
+ *  Note: The callback provided must be reentrant, the library does not guarentee
+ *  that there won't be concurrent calls.
+ *  Note: Callback does not own the LogData struct.  If the client would like to
+ *  keep one around use hdfsCopyLogData/hdfsFreeLogData.
+ **/
+LIBHDFS_EXTERNAL
+void hdfsSetLogFunction(void (*hook)(LogData*));
+
+/**
+ *  Create a copy of the LogData object passed in and return a pointer to it.
+ *  Returns null if it was unable to copy/
+ **/
+LIBHDFS_EXTERNAL
+LogData *hdfsCopyLogData(const LogData*);
+
+/**
+ *  Client must call this to dispose of the LogData created by hdfsCopyLogData.
+ **/
+LIBHDFS_EXTERNAL
+void hdfsFreeLogData(LogData*);
+
+/**
+ * Enable loggind functionality for a component.
+ * Return -1 on failure, 0 otherwise.
+ **/
+LIBHDFS_EXTERNAL
+int hdfsEnableLoggingForComponent(int component);
+
+/**
+ * Disable logging functionality for a component.
+ * Return -1 on failure, 0 otherwise.
+ **/
+LIBHDFS_EXTERNAL
+int hdfsDisableLoggingForComponent(int component);
+
+/**
+ * Set level between trace and error.
+ * Return -1 on failure, 0 otherwise.
+ **/
+LIBHDFS_EXTERNAL
+int hdfsSetLoggingLevel(int component);
+
+/*
+ * Supported event names.  These names will stay consistent in libhdfs callbacks.
+ *
+ * Other events not listed here may be seen, but they are not stable and
+ * should not be counted on.
+ */
+extern const char * FS_NN_CONNECT_EVENT;
+extern const char * FS_NN_READ_EVENT;
+extern const char * FS_NN_WRITE_EVENT;
+
+extern const char * FILE_DN_CONNECT_EVENT;
+extern const char * FILE_DN_READ_EVENT;
+extern const char * FILE_DN_WRITE_EVENT;
+
+
+#define LIBHDFSPP_EVENT_OK (0)
+#define DEBUG_SIMULATE_ERROR (-1)
+
+typedef int (*libhdfspp_fs_event_callback)(const char * event, const char * cluster,
+                                           int64_t value, int64_t cookie);
+typedef int (*libhdfspp_file_event_callback)(const char * event,
+                                             const char * cluster,
+                                             const char * file,
+                                             int64_t value, int64_t cookie);
+
+/**
+ * Registers a callback for the next filesystem connect operation the current
+ * thread executes.
+ *
+ *  @param handler A function pointer.  Taken as a void* and internally
+ *                 cast into the appropriate type.
+ *  @param cookie  An opaque value that will be passed into the handler; can
+ *                 be used to correlate the handler with some object in the
+ *                 consumer's space.
+ **/
+LIBHDFS_EXTERNAL
+int hdfsPreAttachFSMonitor(libhdfspp_fs_event_callback handler, int64_t cookie);
+
+
+/**
+ * Registers a callback for the next file open operation the current thread
+ * executes.
+ *
+ *  @param fs      The filesystem
+ *  @param handler A function pointer.  Taken as a void* and internally
+ *                 cast into the appropriate type.
+ *  @param cookie  An opaque value that will be passed into the handler; can
+ *                 be used to correlate the handler with some object in the
+ *                 consumer's space.
+ **/
+LIBHDFS_EXTERNAL
+int hdfsPreAttachFileMonitor(libhdfspp_file_event_callback handler, int64_t cookie);
+
+
+/**
+ * Finds file name on the file system. hdfsFreeFileInfo should be called to deallocate memory.
+ *
+ *  @param fs         The filesystem (required)
+ *  @param path       Path at which to begin search, can have wild cards  (must be non-blank)
+ *  @param name       Name to find, can have wild cards                   (must be non-blank)
+ *  @param numEntries Set to the number of files/directories in the result.
+ *  @return           Returns a dynamically-allocated array of hdfsFileInfo
+ *                    objects; NULL on error or empty result.
+ *                    errno is set to non-zero on error or zero on success.
+ **/
+LIBHDFS_EXTERNAL
+hdfsFileInfo * hdfsFind(hdfsFS fs, const char* path, const char* name, uint32_t * numEntries);
+
+
+/*****************************************************************************
+ *                    HDFS SNAPSHOT FUNCTIONS
+ ****************************************************************************/
+
+/**
+ * Creates a snapshot of a snapshottable directory specified by path
+ *
+ *  @param fs      The filesystem (required)
+ *  @param path    Path to the directory to be snapshotted (must be non-blank)
+ *  @param name    Name to be given to the created snapshot (may be NULL)
+ *  @return        0 on success, corresponding errno on failure
+ **/
+LIBHDFS_EXTERNAL
+int hdfsCreateSnapshot(hdfsFS fs, const char* path, const char* name);
+
+/**
+ * Deletes the directory snapshot specified by path and name
+ *
+ *  @param fs      The filesystem (required)
+ *  @param path    Path to the snapshotted directory (must be non-blank)
+ *  @param name    Name of the snapshot to be deleted (must be non-blank)
+ *  @return        0 on success, corresponding errno on failure
+ **/
+LIBHDFS_EXTERNAL
+int hdfsDeleteSnapshot(hdfsFS fs, const char* path, const char* name);
+
+/**
+ * Renames the directory snapshot specified by path from old_name to new_name
+ *
+ *  @param fs         The filesystem (required)
+ *  @param path       Path to the snapshotted directory (must be non-blank)
+ *  @param old_name   Current name of the snapshot (must be non-blank)
+ *  @param new_name   New name of the snapshot (must be non-blank)
+ *  @return           0 on success, corresponding errno on failure
+ **/
+int hdfsRenameSnapshot(hdfsFS fs, const char* path, const char* old_name, const char* new_name);
+
+/**
+ * Allows snapshots to be made on the specified directory
+ *
+ *  @param fs      The filesystem (required)
+ *  @param path    Path to the directory to be made snapshottable (must be non-blank)
+ *  @return        0 on success, corresponding errno on failure
+ **/
+LIBHDFS_EXTERNAL
+int hdfsAllowSnapshot(hdfsFS fs, const char* path);
+
+/**
+ * Disallows snapshots to be made on the specified directory
+ *
+ *  @param fs      The filesystem (required)
+ *  @param path    Path to the directory to be made non-snapshottable (must be non-blank)
+ *  @return        0 on success, corresponding errno on failure
+ **/
+LIBHDFS_EXTERNAL
+int hdfsDisallowSnapshot(hdfsFS fs, const char* path);
+
+/**
+ * Create a FileSystem based on the builder but don't connect
+ * @param bld     Used to populate config options in the same manner as hdfsBuilderConnect.
+ *                Does not free builder.
+ **/
+LIBHDFS_EXTERNAL
+hdfsFS hdfsAllocateFileSystem(struct hdfsBuilder *bld);
+
+/**
+ * Connect a FileSystem created with hdfsAllocateFileSystem
+ * @param fs      A disconnected FS created with hdfsAllocateFileSystem
+ * @param bld     The same or exact copy of the builder used for Allocate, we still need a few fields.
+ *                Does not free builder.
+ * @return        0 on success, corresponding errno on failure
+ **/
+LIBHDFS_EXTERNAL
+int hdfsConnectAllocated(hdfsFS fs, struct hdfsBuilder *bld);
+
+/**
+ * Cancel a pending connection on a FileSystem
+ * @param fs      A fs in the process of connecting using hdfsConnectAllocated in another thread.
+ * @return        0 on success, corresponding errno on failure
+ **/
+LIBHDFS_EXTERNAL
+int hdfsCancelPendingConnection(hdfsFS fs);
+
+
+#ifdef __cplusplus
+} /* end extern "C" */
+#endif
+
+#endif

+ 492 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/hdfspp.h

@@ -0,0 +1,492 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIBHDFSPP_HDFSPP_H_
+#define LIBHDFSPP_HDFSPP_H_
+
+#include "hdfspp/options.h"
+#include "hdfspp/status.h"
+#include "hdfspp/events.h"
+#include "hdfspp/block_location.h"
+#include "hdfspp/statinfo.h"
+#include "hdfspp/fsinfo.h"
+#include "hdfspp/content_summary.h"
+#include "hdfspp/uri.h"
+#include "hdfspp/config_parser.h"
+#include "hdfspp/locks.h"
+
+#include <functional>
+#include <memory>
+#include <set>
+#include <iostream>
+
+namespace hdfs {
+
+/**
+ * An IoService manages a queue of asynchronous tasks. All libhdfs++
+ * operations are filed against a particular IoService.
+ *
+ * When an operation is queued into an IoService, the IoService will
+ * run the callback handler associated with the operation. Note that
+ * the IoService must be stopped before destructing the objects that
+ * post the operations.
+ *
+ * From an implementation point of view the hdfs::IoService provides
+ * a thin wrapper over an asio::io_service object so that additional
+ * instrumentation and functionality can be added.
+ **/
+
+class IoService : public std::enable_shared_from_this<IoService>
+{
+ public:
+  static IoService *New();
+  static std::shared_ptr<IoService> MakeShared();
+  virtual ~IoService();
+
+  /**
+   * Start up as many threads as there are logical processors.
+   * Return number of threads created.
+   **/
+  virtual unsigned int InitDefaultWorkers() = 0;
+
+  /**
+   * Initialize with thread_count handler threads.
+   * If thread count is less than one print a log message and default to one thread.
+   * Return number of threads created.
+   **/
+  virtual unsigned int InitWorkers(unsigned int thread_count) = 0;
+
+  /**
+   * Place an item on the execution queue.  Will be invoked from outside of the calling context.
+   **/
+  virtual void PostTask(std::function<void(void)>& asyncTask) = 0;
+
+  /**
+   * Run the asynchronous tasks associated with this IoService.
+   **/
+  virtual void Run() = 0;
+  /**
+   * Stop running asynchronous tasks associated with this IoService.
+   * All worker threads will return as soon as they finish executing their current task.
+   **/
+  virtual void Stop() = 0;
+};
+
+/**
+ * A node exclusion rule provides a simple way of testing if the
+ * client should attempt to connect to a node based on the node's
+ * UUID.  The FileSystem and FileHandle use the BadDataNodeTracker
+ * by default.  AsyncPreadSome takes an optional NodeExclusionRule
+ * that will override the BadDataNodeTracker.
+ **/
+class NodeExclusionRule {
+ public:
+  virtual ~NodeExclusionRule();
+  virtual bool IsBadNode(const std::string &node_uuid) = 0;
+};
+
+/**
+ * Applications opens a FileHandle to read files in HDFS.
+ **/
+class FileHandle {
+public:
+  /**
+   * Read data from a specific position. The current implementation
+   * stops at the block boundary.
+   *
+   * @param buf the pointer to the buffer
+   * @param buf_size the size of the buffer
+   * @param offset the offset the file
+   *
+   * The handler returns the datanode that serves the block and the number of
+   * bytes has read. Status::InvalidOffset is returned when trying to begin
+   * a read past the EOF.
+   **/
+  virtual void
+  PositionRead(void *buf, size_t buf_size, uint64_t offset,
+               const std::function<void(const Status &, size_t)> &handler) = 0;
+  virtual Status PositionRead(void *buf, size_t buf_size, off_t offset, size_t *bytes_read) = 0;
+  virtual Status Read(void *buf, size_t buf_size, size_t *bytes_read) = 0;
+  virtual Status Seek(off_t *offset, std::ios_base::seekdir whence) = 0;
+
+  /**
+   * Cancel outstanding file operations.  This is not reversable, once called
+   * the handle should be disposed of.
+   **/
+  virtual void CancelOperations(void) = 0;
+
+  /**
+   * Determine if a datanode should be excluded from future operations
+   * based on the return Status.
+   *
+   * @param status the Status object returned by InputStream::PositionRead
+   * @return true if the status indicates a failure that is not recoverable
+   * by the client and false otherwise.
+   **/
+  static bool ShouldExclude(const Status &status);
+
+
+  /**
+   * Sets an event callback for file-level event notifications (such as connecting
+   * to the DataNode, communications errors, etc.)
+   *
+   * Many events are defined in hdfspp/events.h; the consumer should also expect
+   * to be called with many private events, which can be ignored.
+   *
+   * @param callback The function to call when a reporting event occurs.
+   */
+  virtual void SetFileEventCallback(file_event_callback callback) = 0;
+
+  /* how many bytes have been successfully read */
+  virtual uint64_t get_bytes_read() = 0;
+
+  /* resets the number of bytes read to zero */
+  virtual void clear_bytes_read() = 0;
+
+  virtual ~FileHandle();
+};
+
+/**
+ * FileSystem implements APIs to interact with HDFS.
+ **/
+class FileSystem {
+ public:
+  //Returns the default maximum depth for recursive Find tool
+  static uint32_t GetDefaultFindMaxDepth();
+
+  //Returns the default permission mask
+  static uint16_t GetDefaultPermissionMask();
+
+  //Checks if the given permission mask is valid
+  static Status CheckValidPermissionMask(uint16_t permissions);
+
+  //Checks if replication value is valid
+  static Status CheckValidReplication(uint16_t replication);
+
+  /**
+   * Create a new instance of the FileSystem object. The call
+   * initializes the RPC connections to the NameNode and returns an
+   * FileSystem object.
+   *
+   * Note: The FileSystem takes ownership of the IoService passed in the
+   * constructor.  The FileSystem destructor will call delete on it.
+   *
+   * If user_name is blank, the current user will be used for a default.
+   **/
+  static FileSystem *New(
+      IoService *&io_service, const std::string &user_name, const Options &options);
+
+  /**
+   * Works the same as the other FileSystem::New but takes a copy of an existing IoService.
+   * The shared IoService is expected to already have worker threads initialized.
+   **/
+  static FileSystem *New(
+      std::shared_ptr<IoService>, const std::string &user_name, const Options &options);
+
+  /**
+   * Returns a new instance with default user and option, with the default IOService.
+   **/
+  static FileSystem *New();
+
+  /**
+   *  Callback type for async FileSystem::Connect calls.
+   *    Provides the result status and instance pointer to the handler.
+   **/
+  typedef std::function<void(const Status& result_status, FileSystem *created_fs)> AsyncConnectCallback;
+
+  /**
+   *  Connect directly to the specified namenode using the host and port (service).
+   **/
+  virtual void Connect(const std::string &server, const std::string &service,
+      const AsyncConnectCallback &handler) = 0;
+
+  /* Synchronous call of Connect */
+  virtual Status Connect(const std::string &server, const std::string &service) = 0;
+
+
+  /**
+   * Connects to the hdfs instance indicated by the defaultFs value of the
+   * Options structure.
+   *
+   * If no defaultFs is defined, returns an error.
+   */
+  virtual void ConnectToDefaultFs(
+      const AsyncConnectCallback& handler) = 0;
+  virtual Status ConnectToDefaultFs() = 0;
+
+  /**
+   * Cancels any attempts to connect to the HDFS cluster.
+   * FileSystem is expected to be destroyed after invoking this.
+   */
+  virtual bool CancelPendingConnect() = 0;
+
+  /**
+   * Open a file on HDFS. The call issues an RPC to the NameNode to
+   * gather the locations of all blocks in the file and to return a
+   * new instance of the @ref InputStream object.
+   **/
+  virtual void
+  Open(const std::string &path,
+       const std::function<void(const Status &, FileHandle *)> &handler) = 0;
+  virtual Status Open(const std::string &path, FileHandle **handle) = 0;
+
+  /**
+   * Get the block size for the given file.
+   * @param path The path to the file
+   */
+  virtual void GetPreferredBlockSize(const std::string &path,
+      const std::function<void(const Status &, const uint64_t &)> &handler) = 0;
+  virtual Status GetPreferredBlockSize(const std::string &path, uint64_t & block_size) = 0;
+
+  /**
+   * Set replication for an existing file.
+   * <p>
+   * The NameNode sets replication to the new value and returns.
+   * The actual block replication is not expected to be performed during
+   * this method call. The blocks will be populated or removed in the
+   * background as the result of the routine block maintenance procedures.
+   *
+   * @param path file name
+   * @param replication new replication
+   */
+  virtual void SetReplication(const std::string & path, int16_t replication, std::function<void(const Status &)> handler) = 0;
+  virtual Status SetReplication(const std::string & path, int16_t replication) = 0;
+
+  /**
+   * Sets the modification and access time of the file to the specified time.
+   * @param path The string representation of the path
+   * @param mtime The number of milliseconds since Jan 1, 1970.
+   *              Setting mtime to -1 means that modification time should not
+   *              be set by this call.
+   * @param atime The number of milliseconds since Jan 1, 1970.
+   *              Setting atime to -1 means that access time should not be set
+   *              by this call.
+   */
+  virtual void SetTimes(const std::string & path, uint64_t mtime, uint64_t atime, std::function<void(const Status &)> handler) = 0;
+  virtual Status SetTimes(const std::string & path, uint64_t mtime, uint64_t atime) = 0;
+
+  /**
+   * Returns metadata about the file if the file/directory exists.
+   **/
+  virtual void
+  GetFileInfo(const std::string &path,
+                  const std::function<void(const Status &, const StatInfo &)> &handler) = 0;
+  virtual Status GetFileInfo(const std::string &path, StatInfo & stat_info) = 0;
+
+  /**
+   * Returns the number of directories, files and bytes under the given path
+   **/
+  virtual void
+  GetContentSummary(const std::string &path,
+                  const std::function<void(const Status &, const ContentSummary &)> &handler) = 0;
+  virtual Status GetContentSummary(const std::string &path, ContentSummary & stat_info) = 0;
+
+  /**
+   * Retrieves the file system information as a whole, such as the total raw size of all files in the filesystem
+   * and the raw capacity of the filesystem
+   *
+   *  FsInfo struct is populated by GetFsStats
+   **/
+  virtual void GetFsStats(
+      const std::function<void(const Status &, const FsInfo &)> &handler) = 0;
+  virtual Status GetFsStats(FsInfo & fs_info) = 0;
+
+  /**
+   * Retrieves the files contained in a directory and returns the metadata
+   * for each of them.
+   *
+   * The asynchronous method will return batches of files; the consumer must
+   * return true if they want more files to be delivered.  The final bool
+   * parameter in the callback will be set to false if this is the final
+   * batch of files.
+   *
+   * The synchronous method will return all files in the directory.
+   *
+   * Path must be an absolute path in the hdfs filesytem (e.g. /tmp/foo/bar)
+   **/
+  virtual void
+  GetListing(const std::string &path,
+                  const std::function<bool(const Status &, const std::vector<StatInfo> &, bool)> &handler) = 0;
+  virtual Status GetListing(const std::string &path, std::vector<StatInfo> * stat_infos) = 0;
+
+  /**
+   * Returns the locations of all known blocks for the indicated file (or part of it), or an error
+   * if the information could not be found
+   */
+  virtual void GetBlockLocations(const std::string & path, uint64_t offset, uint64_t length,
+    const std::function<void(const Status &, std::shared_ptr<FileBlockLocation> locations)> ) = 0;
+  virtual Status GetBlockLocations(const std::string & path, uint64_t offset, uint64_t length,
+    std::shared_ptr<FileBlockLocation> * locations) = 0;
+
+  /**
+   * Creates a new directory
+   *
+   *  @param path           Path to the directory to be created (must be non-empty)
+   *  @param permissions    Permissions for the new directory   (negative value for the default permissions)
+   *  @param createparent   Create parent directories if they do not exist (may not be empty)
+   */
+  virtual void Mkdirs(const std::string & path, uint16_t permissions, bool createparent,
+      std::function<void(const Status &)> handler) = 0;
+  virtual Status Mkdirs(const std::string & path, uint16_t permissions, bool createparent) = 0;
+
+  /**
+   *  Delete the given file or directory from the file system.
+   *  <p>
+   *  same as delete but provides a way to avoid accidentally
+   *  deleting non empty directories programmatically.
+   *  @param path existing name (must be non-empty)
+   *  @param recursive if true deletes a non empty directory recursively
+   */
+  virtual void Delete(const std::string &path, bool recursive,
+      const std::function<void(const Status &)> &handler) = 0;
+  virtual Status Delete(const std::string &path, bool recursive) = 0;
+
+  /**
+   *  Rename - Rename file.
+   *  @param oldPath The path of the source file.       (must be non-empty)
+   *  @param newPath The path of the destination file.  (must be non-empty)
+   */
+  virtual void Rename(const std::string &oldPath, const std::string &newPath,
+      const std::function<void(const Status &)> &handler) = 0;
+  virtual Status Rename(const std::string &oldPath, const std::string &newPath) = 0;
+
+  /**
+   * Set permissions for an existing file/directory.
+   *
+   * @param path          the path to the file or directory
+   * @param permissions   the bitmask to set it to (should be between 0 and 01777)
+   */
+  virtual void SetPermission(const std::string & path, uint16_t permissions,
+      const std::function<void(const Status &)> &handler) = 0;
+  virtual Status SetPermission(const std::string & path, uint16_t permissions) = 0;
+
+  /**
+   * Set Owner of a path (i.e. a file or a directory).
+   * The parameters username and groupname can be empty.
+   * @param path      file path
+   * @param username  If it is empty, the original username remains unchanged.
+   * @param groupname If it is empty, the original groupname remains unchanged.
+   */
+  virtual void SetOwner(const std::string & path, const std::string & username,
+      const std::string & groupname, const std::function<void(const Status &)> &handler) = 0;
+  virtual Status SetOwner(const std::string & path,
+      const std::string & username, const std::string & groupname) = 0;
+
+  /**
+   * Finds all files matching the specified name recursively starting from the
+   * specified directory. Returns metadata for each of them.
+   *
+   * Example: Find("/dir?/tree*", "some?file*name")
+   *
+   * @param path       Absolute path at which to begin search, can have wild cards (must be non-blank)
+   * @param name       Name to find, can also have wild cards                      (must be non-blank)
+   *
+   * The asynchronous method will return batches of files; the consumer must
+   * return true if they want more files to be delivered.  The final bool
+   * parameter in the callback will be set to false if this is the final
+   * batch of files.
+   *
+   * The synchronous method will return matching files.
+   **/
+  virtual void
+  Find(const std::string &path, const std::string &name, const uint32_t maxdepth,
+                  const std::function<bool(const Status &, const std::vector<StatInfo> & , bool)> &handler) = 0;
+  virtual Status Find(const std::string &path, const std::string &name,
+                  const uint32_t maxdepth, std::vector<StatInfo> * stat_infos) = 0;
+
+
+  /*****************************************************************************
+   *                    FILE SYSTEM SNAPSHOT FUNCTIONS
+   ****************************************************************************/
+
+  /**
+   * Creates a snapshot of a snapshottable directory specified by path
+   *
+   *  @param path    Path to the directory to be snapshotted (must be non-empty)
+   *  @param name    Name to be given to the created snapshot (may be empty)
+   **/
+  virtual void CreateSnapshot(const std::string &path, const std::string &name,
+      const std::function<void(const Status &)> &handler) = 0;
+  virtual Status CreateSnapshot(const std::string &path,
+      const std::string &name) = 0;
+
+  /**
+   * Deletes the directory snapshot specified by path and name
+   *
+   *  @param path    Path to the snapshotted directory (must be non-empty)
+   *  @param name    Name of the snapshot to be deleted (must be non-empty)
+   **/
+  virtual void DeleteSnapshot(const std::string &path, const std::string &name,
+      const std::function<void(const Status &)> &handler) = 0;
+  virtual Status DeleteSnapshot(const std::string &path,
+      const std::string &name) = 0;
+
+  /**
+   * Renames the directory snapshot specified by path from old_name to new_name
+   *
+   *  @param path       Path to the snapshotted directory (must be non-blank)
+   *  @param old_name   Current name of the snapshot (must be non-blank)
+   *  @param new_name   New name of the snapshot (must be non-blank)
+   **/
+  virtual void RenameSnapshot(const std::string &path, const std::string &old_name,
+      const std::string &new_name, const std::function<void(const Status &)> &handler) = 0;
+  virtual Status RenameSnapshot(const std::string &path, const std::string &old_name,
+      const std::string &new_name) = 0;
+
+  /**
+   * Allows snapshots to be made on the specified directory
+   *
+   *  @param path    Path to the directory to be made snapshottable (must be non-empty)
+   **/
+  virtual void AllowSnapshot(const std::string &path,
+      const std::function<void(const Status &)> &handler) = 0;
+  virtual Status AllowSnapshot(const std::string &path) = 0;
+
+  /**
+   * Disallows snapshots to be made on the specified directory
+   *
+   *  @param path    Path to the directory to be made non-snapshottable (must be non-empty)
+   **/
+  virtual void DisallowSnapshot(const std::string &path,
+      const std::function<void(const Status &)> &handler) = 0;
+  virtual Status DisallowSnapshot(const std::string &path) = 0;
+
+  /**
+   * Note that it is an error to destroy the filesystem from within a filesystem
+   * callback.  It will lead to a deadlock and the termination of the process.
+   */
+  virtual ~FileSystem();
+
+
+  /**
+   * Sets an event callback for fs-level event notifications (such as connecting
+   * to the NameNode, communications errors with the NN, etc.)
+   *
+   * Many events are defined in hdfspp/events.h; the consumer should also expect
+   * to be called with many private events, which can be ignored.
+   *
+   * @param callback The function to call when a reporting event occurs.
+   */
+  virtual void SetFsEventCallback(fs_event_callback callback) = 0;
+
+  virtual Options get_options() = 0;
+
+  virtual std::string get_cluster_name() = 0;
+};
+}
+
+#endif

+ 110 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/locks.h

@@ -0,0 +1,110 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_HDFS_LOCKS_H_
+#define COMMON_HDFS_LOCKS_H_
+
+#include <stdexcept>
+#include <string>
+#include <atomic>
+#include <mutex>
+#include <memory>
+
+namespace hdfs
+{
+
+//
+//  Thrown by LockGuard to indicate that it was unable to acquire a mutex
+//  what_str should contain info about what caused the failure
+//
+class LockFailure : public std::runtime_error {
+ public:
+  LockFailure(const char *what_str) : std::runtime_error(what_str) {};
+  LockFailure(const std::string& what_str) : std::runtime_error(what_str) {};
+};
+
+//
+//  A pluggable mutex type to allow client code to share mutexes it may
+//  already use to protect certain system resources.  Certain shared
+//  libraries have some procedures that aren't always implemented in a thread
+//  safe manner. If libhdfs++ and the code linking it depend on the same
+//  library this provides a mechanism to coordinate safe access.
+//
+//  Interface provided is intended to be similar to std::mutex.  If the lock
+//  can't be aquired it may throw LockFailure from the lock method. If lock
+//  does fail libhdfs++ is expected fail as cleanly as possible e.g.
+//  FileSystem::Mkdirs might return a MutexError but a subsequent call may be
+//  successful.
+//
+class Mutex {
+ public:
+  virtual ~Mutex() {};
+  virtual void lock() = 0;
+  virtual void unlock() = 0;
+  virtual std::string str() = 0;
+};
+
+//
+//  LockGuard works in a similar manner to std::lock_guard: it locks the mutex
+//  in the constructor and unlocks it in the destructor.
+//  Failure to acquire the mutex in the constructor will result in throwing a
+//  LockFailure exception.
+//
+class LockGuard {
+ public:
+  LockGuard(Mutex *m);
+  ~LockGuard();
+ private:
+  Mutex *_mtx;
+};
+
+//
+//  Manage instances of hdfs::Mutex that are intended to be global to the
+//  process.
+//
+//  LockManager's InitLocks method provides a mechanism for the calling
+//  application to share its own implementations of hdfs::Mutex.  It must be
+//  called prior to instantiating any FileSystem objects and can only be
+//  called once.  If a lock is not provided a default mutex type wrapping
+//  std::mutex is used as a default.
+//
+
+class LockManager {
+ public:
+  // Initializes with a default set of C++11 style mutexes
+  static bool InitLocks(Mutex *gssapi);
+  static Mutex *getGssapiMutex();
+
+  // Tests only, implementation may no-op on release builds.
+  // Reset _finalized to false and set all Mutex* members to default values.
+  static void TEST_reset_manager();
+  static Mutex *TEST_get_default_mutex();
+ private:
+  // Used only in tests.
+  static Mutex *TEST_default_mutex;
+  // Use to synchronize calls into GSSAPI/Kerberos libs
+  static Mutex *gssapiMtx;
+
+  // Prevent InitLocks from being called more than once
+  // Allows all locks to be set a single time atomically
+  static std::mutex _state_lock;
+  static bool _finalized;
+};
+
+} // end namespace hdfs
+#endif

+ 60 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/log.h

@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBHDFSPP_HDFS_LOG
+#define LIBHDFSPP_HDFS_LOG
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ *  Things that are part of the public API but are specific to logging live here.
+ *  Added to avoid including the whole public API into the implementation of the logger.
+ **/
+
+/* logging levels, compatible with enum in lib/common/logging.cc */
+#define HDFSPP_LOG_LEVEL_TRACE 0
+#define HDFSPP_LOG_LEVEL_DEBUG 1
+#define HDFSPP_LOG_LEVEL_INFO  2
+#define HDFSPP_LOG_LEVEL_WARN  3
+#define HDFSPP_LOG_LEVEL_ERROR 4
+
+/* components emitting messages, compatible with enum lib/common/logging.cc */
+#define HDFSPP_LOG_COMPONENT_UNKNOWN      1 << 0
+#define HDFSPP_LOG_COMPONENT_RPC          1 << 1
+#define HDFSPP_LOG_COMPONENT_BLOCKREADER  1 << 2
+#define HDFSPP_LOG_COMPONENT_FILEHANDLE   1 << 3
+#define HDFSPP_LOG_COMPONENT_FILESYSTEM   1 << 4
+
+/**
+ *  POD struct for C to consume (C++ interface gets to take advantage of RAII)
+ **/
+typedef struct {
+  const char *msg;
+  int level;
+  int component;
+  const char *file_name;
+  int file_line;
+} LogData;
+
+#ifdef __cplusplus
+} // end extern C
+#endif
+
+#endif

+ 136 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/options.h

@@ -0,0 +1,136 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIBHDFSPP_OPTIONS_H_
+#define LIBHDFSPP_OPTIONS_H_
+
+#include "hdfspp/uri.h"
+
+#include <string>
+#include <vector>
+#include <map>
+
+namespace hdfs {
+
+
+struct NamenodeInfo {
+  NamenodeInfo(const std::string &nameservice_, const std::string &nodename_, const URI &uri_) :
+                nameservice(nameservice_), name(nodename_), uri(uri_) {}
+  NamenodeInfo(){}
+  //nameservice this belongs to
+  std::string nameservice;
+  //node name
+  std::string name;
+  //host:port
+  URI uri;
+
+  //get server hostname and port (aka service)
+  std::string get_host() const;
+  std::string get_port() const;
+};
+
+/**
+ * Options to control the behavior of the libhdfspp library.
+ **/
+struct Options {
+  /**
+   * Time out of RPC requests in milliseconds.
+   * Default: 30000
+   **/
+  int rpc_timeout;
+  static const int kDefaultRpcTimeout = 30000;
+
+  /**
+   * Time to wait for an RPC connection before failing
+   * Default: 30000
+   **/
+  int rpc_connect_timeout;
+  static const int kDefaultRpcConnectTimeout = 30000;
+
+  /**
+   * Maximum number of retries for RPC operations
+   **/
+  int max_rpc_retries;
+  static const int kNoRetry = 0;
+  static const int kDefaultMaxRpcRetries = kNoRetry;
+
+  /**
+   * Number of ms to wait between retry of RPC operations
+   **/
+  int rpc_retry_delay_ms;
+  static const int kDefaultRpcRetryDelayMs = 10000;
+
+  /**
+   * Exclusion time for failed datanodes in milliseconds.
+   * Default: 60000
+   **/
+  unsigned int host_exclusion_duration;
+  static const unsigned int kDefaultHostExclusionDuration = 600000;
+
+  /**
+   * URI to connect to if no host:port are specified in connect
+   */
+  URI defaultFS;
+
+  /**
+   * Namenodes used to provide HA for this cluster if applicable
+   **/
+  std::map<std::string, std::vector<NamenodeInfo>> services;
+
+
+  /**
+   * Client failover attempts before failover gives up
+   **/
+  int failover_max_retries;
+  static const unsigned int kDefaultFailoverMaxRetries = 4;
+
+  /**
+   * Client failover attempts before failover gives up if server
+   * connection is timing out.
+   **/
+  int failover_connection_max_retries;
+  static const unsigned int kDefaultFailoverConnectionMaxRetries = 0;
+
+  /*
+   * Which form of authentication to use with the server
+   * Default: simple
+   */
+  enum Authentication {
+      kSimple,
+      kKerberos
+  };
+  Authentication authentication;
+  static const Authentication kDefaultAuthentication = kSimple;
+
+  /**
+   * Block size in bytes.
+   * Default: 128 * 1024 * 1024 = 134217728
+   **/
+  long block_size;
+  static const long kDefaultBlockSize = 128*1024*1024;
+
+  /**
+   * Asio worker thread count
+   * default: -1, indicates number of hardware threads
+   **/
+  int io_threads_;
+  static const int kDefaultIoThreads = -1;
+
+  Options();
+};
+}
+#endif

+ 59 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/statinfo.h

@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef HDFSPP_STATINFO_H_
+#define HDFSPP_STATINFO_H_
+
+#include <string>
+
+namespace hdfs {
+
+/**
+ * Information that is assumed to be unchanging about a file for the duration of
+ * the operations.
+ */
+struct StatInfo {
+  enum FileType {
+    IS_DIR = 1,
+    IS_FILE = 2,
+    IS_SYMLINK = 3
+  };
+
+  int          file_type;
+  std::string  path;
+  std::string  full_path;
+  uint64_t     length;
+  uint64_t     permissions;  //Octal number as in POSIX permissions; e.g. 0777
+  std::string  owner;
+  std::string  group;
+  uint64_t     modification_time;
+  uint64_t     access_time;
+  std::string  symlink;
+  uint32_t     block_replication;
+  uint64_t     blocksize;
+  uint64_t     fileid;
+  uint64_t     children_num;
+
+  StatInfo();
+
+  //Converts StatInfo object to std::string (hdfs_ls format)
+  std::string str() const;
+};
+
+}
+
+#endif

+ 111 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/status.h

@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIBHDFSPP_STATUS_H_
+#define LIBHDFSPP_STATUS_H_
+
+#include <string>
+#include <system_error>
+
+namespace hdfs {
+
+class Status {
+ public:
+  // Create a success status.
+  Status() : code_(0) {}
+
+  // Note: Avoid calling the Status constructors directly, call the factory methods instead
+
+  // Used for common status  types
+  Status(int code, const char *msg);
+  // Used for server side exceptions reported through RpcResponseProto and similar
+  Status(int code, const char *exception_class, const char *exception_details);
+
+  // Factory methods
+  static Status OK();
+  static Status InvalidArgument(const char *msg);
+  static Status ResourceUnavailable(const char *msg);
+  static Status Unimplemented();
+  static Status Exception(const char *exception_class_name, const char *exception_details);
+  static Status Error(const char *error_message);
+  static Status AuthenticationFailed();
+  static Status AuthenticationFailed(const char *msg);
+  static Status AuthorizationFailed();
+  static Status AuthorizationFailed(const char *msg);
+  static Status Canceled();
+  static Status PathNotFound(const char *msg);
+  static Status InvalidOffset(const char *msg);
+  static Status PathIsNotDirectory(const char *msg);
+  static Status MutexError(const char *msg);
+
+  // success
+  bool ok() const { return code_ == 0; }
+
+  bool is_invalid_offset() const { return code_ == kInvalidOffset; }
+
+  // contains ENOENT error
+  bool pathNotFound() const { return code_ == kPathNotFound; }
+
+  // Returns the string "OK" for success.
+  std::string ToString() const;
+
+  // get error code
+  int code() const { return code_; }
+
+  // if retry can possibly recover an error
+  bool notWorthRetry() const;
+
+  enum Code {
+    kOk = 0,
+    kInvalidArgument = static_cast<unsigned>(std::errc::invalid_argument),
+    kResourceUnavailable = static_cast<unsigned>(std::errc::resource_unavailable_try_again),
+    kUnimplemented = static_cast<unsigned>(std::errc::function_not_supported),
+    kOperationCanceled = static_cast<unsigned>(std::errc::operation_canceled),
+    kPermissionDenied = static_cast<unsigned>(std::errc::permission_denied),
+    kPathNotFound = static_cast<unsigned>(std::errc::no_such_file_or_directory),
+    kNotADirectory = static_cast<unsigned>(std::errc::not_a_directory),
+    kFileAlreadyExists = static_cast<unsigned>(std::errc::file_exists),
+    kPathIsNotEmptyDirectory = static_cast<unsigned>(std::errc::directory_not_empty),
+    kBusy = static_cast<unsigned>(std::errc::device_or_resource_busy),
+
+    // non-errc codes start at 256
+    kException = 256,
+    kAuthenticationFailed = 257,
+    kAccessControlException = 258,
+    kStandbyException = 259,
+    kSnapshotProtocolException = 260,
+    kInvalidOffset = 261,
+  };
+
+  std::string get_exception_class_str() const {
+    return exception_class_;
+  }
+
+  int get_server_exception_type() const {
+    return code_;
+  }
+
+ private:
+  int code_;
+  std::string msg_;
+
+  std::string exception_class_;
+};
+
+}
+
+#endif

+ 137 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/include/hdfspp/uri.h

@@ -0,0 +1,137 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_HDFS_URI_H_
+#define COMMON_HDFS_URI_H_
+
+#include <iostream>
+#include <string>
+#include <vector>
+#include <stdexcept>
+
+namespace hdfs
+{
+
+class uri_parse_error : public std::invalid_argument {
+ public:
+  uri_parse_error(const char *what_str) : std::invalid_argument(what_str) {}
+  uri_parse_error(const std::string& what_str) : std::invalid_argument(what_str) {}
+};
+
+class URI {
+public:
+  // Parse a string into a URI.  Throw a hdfs::uri_parse_error if URI is malformed.
+  static URI parse_from_string(const std::string &str);
+
+  // URI encode/decode strings
+  static std::string encode  (const std::string &input);
+  static std::string decode  (const std::string &input);
+
+  URI();
+
+  std::string get_scheme(bool encoded_output=false) const;
+
+  void set_scheme(const std::string &s, bool encoded_input=false);
+
+  // empty if none.
+  std::string get_host(bool encoded_output=false) const;
+
+  void set_host(const std::string& h, bool encoded_input=false);
+
+  // true if port has been set
+  bool has_port() const;
+
+  // undefined if port hasn't been set
+  uint16_t get_port() const;
+
+  // use default if port hasn't been set
+  uint16_t get_port_or_default(uint16_t default_val) const;
+
+  void set_port(uint16_t p);
+
+  void clear_port();
+
+  std::string get_path(bool encoded_output=false) const;
+
+  void set_path(const std::string &p, bool encoded_input=false);
+
+  void add_path(const std::string &p, bool encoded_input=false);
+
+  std::vector<std::string> get_path_elements(bool encoded_output=false) const;
+
+  struct Query {
+    Query(const std::string& key, const std::string& val);
+    std::string key;
+    std::string value;
+  };
+
+  std::string get_query(bool encoded_output=false) const;
+
+  std::vector<Query> get_query_elements(bool encoded_output=false) const;
+
+  // Not that set_query must always pass in encoded strings
+  void set_query(const std::string &q);
+
+  // Adds a parameter onto the query; does not check if it already exists
+  //   e.g. parseFromString("foo?bar=baz").addQuery("bing","bang")
+  //   would leave "bar=baz&bing=bang" as the query
+  void add_query(const std::string &name, const std::string & value, bool encoded_input=false);
+
+  // Removes the query part if exists
+  //   e.g. parseFromString("foo?bar=baz&bing=bang&bar=bong").removeQueries("bar")
+  //   would leave bing=bang as the query
+  void remove_query(const std::string &q_name, bool encoded_input=false);
+
+  std::string get_fragment(bool encoded_output=false) const;
+
+  void set_fragment(const std::string &f, bool encoded_input=false);
+
+  std::string str(bool encoded_output=true) const;
+
+  // Get a string with each URI field printed on a seperate line
+  std::string GetDebugString() const;
+private:
+  // These are stored in encoded form
+  std::string scheme;
+  std::string user;
+  std::string pass;
+  std::string host;
+  std::vector<std::string> path;
+  std::vector<Query> queries;
+  std::string fragment;
+  // implicitly narrowed to uint16_t if positive
+  // -1 to indicate uninitialized
+  int32_t _port;
+
+  // URI encoding helpers
+  static std::string from_encoded(bool encoded_output, const std::string & input);
+  static std::string to_encoded(bool encoded_input, const std::string & input);
+
+  bool has_authority() const;
+  std::string build_authority(bool encoded_output) const;
+
+  std::string build_path(bool encoded_output) const;
+  void parse_path(bool input_encoded, const std::string &input_path);
+};
+
+inline std::ostream& operator<<(std::ostream &out, const URI &uri) {
+  return out << uri.str();
+}
+
+}
+#endif

+ 25 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/CMakeLists.txt

@@ -0,0 +1,25 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_subdirectory(common)
+add_subdirectory(fs)
+add_subdirectory(reader)
+add_subdirectory(rpc)
+add_subdirectory(proto)
+add_subdirectory(connection)
+add_subdirectory(bindings)

+ 19 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/bindings/CMakeLists.txt

@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+add_subdirectory(c)

+ 21 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/bindings/c/CMakeLists.txt

@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+add_library(bindings_c_obj OBJECT hdfs.cc)
+add_dependencies(bindings_c_obj fs rpc reader proto common fs rpc reader proto common)
+add_library(bindings_c $<TARGET_OBJECTS:bindings_c_obj>)

+ 2007 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/bindings/c/hdfs.cc

@@ -0,0 +1,2007 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hdfspp/hdfspp.h"
+
+#include "fs/filesystem.h"
+#include "common/hdfs_configuration.h"
+#include "common/configuration_loader.h"
+#include "common/logging.h"
+
+#include <hdfs/hdfs.h>
+#include <hdfspp/hdfs_ext.h>
+
+#include <libgen.h>
+#include "limits.h"
+
+#include <string>
+#include <cstring>
+#include <iostream>
+#include <algorithm>
+#include <functional>
+
+using namespace hdfs;
+using std::experimental::nullopt;
+using namespace std::placeholders;
+
+static constexpr tPort kDefaultPort = 8020;
+
+/** Annotate what parts of the code below are implementatons of API functions
+ *  and if they are normal vs. extended API.
+ */
+#define LIBHDFS_C_API
+#define LIBHDFSPP_EXT_API
+
+/* Separate the handles used by the C api from the C++ API*/
+struct hdfs_internal {
+  hdfs_internal(FileSystem *p) : filesystem_(p), working_directory_("/") {}
+  hdfs_internal(std::unique_ptr<FileSystem> p)
+      : filesystem_(std::move(p)), working_directory_("/") {}
+  virtual ~hdfs_internal(){};
+  FileSystem *get_impl() { return filesystem_.get(); }
+  const FileSystem *get_impl() const { return filesystem_.get(); }
+  std::string get_working_directory() {
+    std::lock_guard<std::mutex> read_guard(wd_lock_);
+    return working_directory_;
+  }
+  void set_working_directory(std::string new_directory) {
+    std::lock_guard<std::mutex> write_guard(wd_lock_);
+    working_directory_ = new_directory;
+  }
+
+ private:
+  std::unique_ptr<FileSystem> filesystem_;
+  std::string working_directory_;      //has to always start and end with '/'
+  std::mutex wd_lock_;                 //synchronize access to the working directory
+};
+
+struct hdfsFile_internal {
+  hdfsFile_internal(FileHandle *p) : file_(p) {}
+  hdfsFile_internal(std::unique_ptr<FileHandle> p) : file_(std::move(p)) {}
+  virtual ~hdfsFile_internal(){};
+  FileHandle *get_impl() { return file_.get(); }
+  const FileHandle *get_impl() const { return file_.get(); }
+
+ private:
+  std::unique_ptr<FileHandle> file_;
+};
+
+/* Keep thread local copy of last error string */
+thread_local std::string errstr;
+
+/* Fetch last error that happened in this thread */
+LIBHDFSPP_EXT_API
+int hdfsGetLastError(char *buf, int len) {
+  //No error message
+  if(errstr.empty()){
+    return -1;
+  }
+
+  //There is an error, but no room for the error message to be copied to
+  if(nullptr == buf || len < 1) {
+    return -1;
+  }
+
+  /* leave space for a trailing null */
+  size_t copylen = std::min((size_t)errstr.size(), (size_t)len);
+  if(copylen == (size_t)len) {
+    copylen--;
+  }
+
+  strncpy(buf, errstr.c_str(), copylen);
+
+  /* stick in null */
+  buf[copylen] = 0;
+
+  return 0;
+}
+
+/* Event callbacks for next open calls */
+thread_local std::experimental::optional<fs_event_callback> fsEventCallback;
+thread_local std::experimental::optional<file_event_callback> fileEventCallback;
+
+struct hdfsBuilder {
+  hdfsBuilder();
+  hdfsBuilder(const char * directory);
+  virtual ~hdfsBuilder() {}
+  ConfigurationLoader loader;
+  HdfsConfiguration config;
+
+  optional<std::string> overrideHost;
+  optional<tPort>       overridePort;
+  optional<std::string> user;
+
+  static constexpr tPort kUseDefaultPort = 0;
+};
+
+/* Error handling with optional debug to stderr */
+static void ReportError(int errnum, const std::string & msg) {
+  errno = errnum;
+  errstr = msg;
+#ifdef LIBHDFSPP_C_API_ENABLE_DEBUG
+  std::cerr << "Error: errno=" << strerror(errnum) << " message=\"" << msg
+            << "\"" << std::endl;
+#else
+  (void)msg;
+#endif
+}
+
+/* Convert Status wrapped error into appropriate errno and return code */
+static int Error(const Status &stat) {
+  const char * default_message;
+  int errnum;
+
+  int code = stat.code();
+  switch (code) {
+    case Status::Code::kOk:
+      return 0;
+    case Status::Code::kInvalidArgument:
+      errnum = EINVAL;
+      default_message = "Invalid argument";
+      break;
+    case Status::Code::kResourceUnavailable:
+      errnum = EAGAIN;
+      default_message = "Resource temporarily unavailable";
+      break;
+    case Status::Code::kUnimplemented:
+      errnum = ENOSYS;
+      default_message = "Function not implemented";
+      break;
+    case Status::Code::kException:
+      errnum = EINTR;
+      default_message = "Exception raised";
+      break;
+    case Status::Code::kOperationCanceled:
+      errnum = EINTR;
+      default_message = "Operation canceled";
+      break;
+    case Status::Code::kPermissionDenied:
+      errnum = EACCES;
+      default_message = "Permission denied";
+      break;
+    case Status::Code::kPathNotFound:
+      errnum = ENOENT;
+      default_message = "No such file or directory";
+      break;
+    case Status::Code::kNotADirectory:
+      errnum = ENOTDIR;
+      default_message = "Not a directory";
+      break;
+    case Status::Code::kFileAlreadyExists:
+      errnum = EEXIST;
+      default_message = "File already exists";
+      break;
+    case Status::Code::kPathIsNotEmptyDirectory:
+      errnum = ENOTEMPTY;
+      default_message = "Directory is not empty";
+      break;
+    case Status::Code::kInvalidOffset:
+      errnum = Status::Code::kInvalidOffset;
+      default_message = "Trying to begin a read past the EOF";
+      break;
+    default:
+      errnum = ENOSYS;
+      default_message = "Error: unrecognised code";
+  }
+  if (stat.ToString().empty())
+    ReportError(errnum, default_message);
+  else
+    ReportError(errnum, stat.ToString());
+  return -1;
+}
+
+static int ReportException(const std::exception & e)
+{
+  return Error(Status::Exception("Uncaught exception", e.what()));
+}
+
+static int ReportCaughtNonException()
+{
+  return Error(Status::Exception("Uncaught value not derived from std::exception", ""));
+}
+
+/* return false on failure */
+bool CheckSystem(hdfsFS fs) {
+  if (!fs) {
+    ReportError(ENODEV, "Cannot perform FS operations with null FS handle.");
+    return false;
+  }
+
+  return true;
+}
+
+/* return false on failure */
+bool CheckHandle(hdfsFile file) {
+  if (!file) {
+    ReportError(EBADF, "Cannot perform FS operations with null File handle.");
+    return false;
+  }
+  return true;
+}
+
+/* return false on failure */
+bool CheckSystemAndHandle(hdfsFS fs, hdfsFile file) {
+  if (!CheckSystem(fs))
+    return false;
+
+  if (!CheckHandle(file))
+    return false;
+
+  return true;
+}
+
+optional<std::string> getAbsolutePath(hdfsFS fs, const char* path) {
+  //Does not support . (dot) and .. (double dot) semantics
+  if (!path || path[0] == '\0') {
+    Error(Status::InvalidArgument("getAbsolutePath: argument 'path' cannot be NULL or empty"));
+    return optional<std::string>();
+  }
+  if (path[0] != '/') {
+    //we know that working directory always ends with '/'
+    return fs->get_working_directory().append(path);
+  }
+  return optional<std::string>(path);
+}
+
+/**
+ * C API implementations
+ **/
+
+LIBHDFS_C_API
+int hdfsFileIsOpenForRead(hdfsFile file) {
+  /* files can only be open for reads at the moment, do a quick check */
+  if (!CheckHandle(file)){
+    return 0;
+  }
+  return 1; // Update implementation when we get file writing
+}
+
+LIBHDFS_C_API
+int hdfsFileIsOpenForWrite(hdfsFile file) {
+  /* files can only be open for reads at the moment, so return false */
+  CheckHandle(file);
+  return -1; // Update implementation when we get file writing
+}
+
+int hdfsConfGetLong(const char *key, int64_t *val)
+{
+  try
+  {
+    errno = 0;
+    hdfsBuilder builder;
+    return hdfsBuilderConfGetLong(&builder, key, val);
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+hdfsFS doHdfsConnect(optional<std::string> nn, optional<tPort> port, optional<std::string> user, const Options & options) {
+  try
+  {
+    errno = 0;
+    IoService * io_service = IoService::New();
+
+    FileSystem *fs = FileSystem::New(io_service, user.value_or(""), options);
+    if (!fs) {
+      ReportError(ENODEV, "Could not create FileSystem object");
+      return nullptr;
+    }
+
+    if (fsEventCallback) {
+      fs->SetFsEventCallback(fsEventCallback.value());
+    }
+
+    Status status;
+    if (nn || port) {
+      if (!port) {
+        port = kDefaultPort;
+      }
+      std::string port_as_string = std::to_string(*port);
+      status = fs->Connect(nn.value_or(""), port_as_string);
+    } else {
+      status = fs->ConnectToDefaultFs();
+    }
+
+    if (!status.ok()) {
+      Error(status);
+
+      // FileSystem's ctor might take ownership of the io_service; if it does,
+      //    it will null out the pointer
+      if (io_service)
+        delete io_service;
+
+      delete fs;
+
+      return nullptr;
+    }
+    return new hdfs_internal(fs);
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return nullptr;
+  } catch (...) {
+    ReportCaughtNonException();
+    return nullptr;
+  }
+}
+
+LIBHDFSPP_EXT_API
+hdfsFS hdfsAllocateFileSystem(struct hdfsBuilder *bld) {
+  // Same idea as the first half of doHdfsConnect, but return the wrapped FS before
+  // connecting.
+  try {
+    errno = 0;
+    std::shared_ptr<IoService> io_service = IoService::MakeShared();
+
+    int io_thread_count = bld->config.GetOptions().io_threads_;
+    if(io_thread_count < 1) {
+      io_service->InitDefaultWorkers();
+    } else {
+      io_service->InitWorkers(io_thread_count);
+    }
+
+    FileSystem *fs = FileSystem::New(io_service, bld->user.value_or(""), bld->config.GetOptions());
+    if (!fs) {
+      ReportError(ENODEV, "Could not create FileSystem object");
+      return nullptr;
+    }
+
+    if (fsEventCallback) {
+      fs->SetFsEventCallback(fsEventCallback.value());
+    }
+
+    return new hdfs_internal(fs);
+  } catch (const std::exception &e) {
+    ReportException(e);
+    return nullptr;
+  } catch (...) {
+    ReportCaughtNonException();
+    return nullptr;
+  }
+  return nullptr;
+}
+
+LIBHDFSPP_EXT_API
+int hdfsConnectAllocated(hdfsFS fs, struct hdfsBuilder *bld) {
+  if(!CheckSystem(fs)) {
+    return ENODEV;
+  }
+
+  if(!bld) {
+    ReportError(ENODEV, "No hdfsBuilder object supplied");
+    return ENODEV;
+  }
+
+  // Get C++ FS to do connect
+  FileSystem *fsImpl = fs->get_impl();
+  if(!fsImpl) {
+    ReportError(ENODEV, "Null FileSystem implementation");
+    return ENODEV;
+  }
+
+  // Unpack the required bits of the hdfsBuilder
+  optional<std::string> nn = bld->overrideHost;
+  optional<tPort> port = bld->overridePort;
+  optional<std::string> user = bld->user;
+
+  // try-catch in case some of the third-party stuff throws
+  try {
+    Status status;
+    if (nn || port) {
+      if (!port) {
+        port = kDefaultPort;
+      }
+      std::string port_as_string = std::to_string(*port);
+      status = fsImpl->Connect(nn.value_or(""), port_as_string);
+    } else {
+      status = fsImpl->ConnectToDefaultFs();
+    }
+
+    if (!status.ok()) {
+      Error(status);
+      return ENODEV;
+    }
+
+    // 0 to indicate a good connection
+    return 0;
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return ENODEV;
+  } catch (...) {
+    ReportCaughtNonException();
+    return ENODEV;
+  }
+
+  return 0;
+}
+
+LIBHDFS_C_API
+hdfsFS hdfsConnect(const char *nn, tPort port) {
+  return hdfsConnectAsUser(nn, port, "");
+}
+
+LIBHDFS_C_API
+hdfsFS hdfsConnectAsUser(const char* nn, tPort port, const char *user) {
+  return doHdfsConnect(std::string(nn), port, std::string(user), Options());
+}
+
+LIBHDFS_C_API
+hdfsFS hdfsConnectAsUserNewInstance(const char* nn, tPort port, const char *user ) {
+  //libhdfspp always returns a new instance
+  return doHdfsConnect(std::string(nn), port, std::string(user), Options());
+}
+
+LIBHDFS_C_API
+hdfsFS hdfsConnectNewInstance(const char* nn, tPort port) {
+  //libhdfspp always returns a new instance
+  return hdfsConnectAsUser(nn, port, "");
+}
+
+LIBHDFSPP_EXT_API
+int hdfsCancelPendingConnection(hdfsFS fs) {
+  // todo: stick an enum in hdfs_internal to check the connect state
+  if(!CheckSystem(fs)) {
+    return ENODEV;
+  }
+
+  FileSystem *fsImpl = fs->get_impl();
+  if(!fsImpl) {
+    ReportError(ENODEV, "Null FileSystem implementation");
+    return ENODEV;
+  }
+
+  bool canceled = fsImpl->CancelPendingConnect();
+  if(canceled) {
+    return 0;
+  } else {
+    return EINTR;
+  }
+}
+
+LIBHDFS_C_API
+int hdfsDisconnect(hdfsFS fs) {
+  try
+  {
+    errno = 0;
+    if (!fs) {
+      ReportError(ENODEV, "Cannot disconnect null FS handle.");
+      return -1;
+    }
+
+    delete fs;
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+hdfsFile hdfsOpenFile(hdfsFS fs, const char *path, int flags, int bufferSize,
+                      short replication, tSize blocksize) {
+  try
+  {
+    errno = 0;
+    (void)flags;
+    (void)bufferSize;
+    (void)replication;
+    (void)blocksize;
+    if (!fs) {
+      ReportError(ENODEV, "Cannot perform FS operations with null FS handle.");
+      return nullptr;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return nullptr;
+    }
+    FileHandle *f = nullptr;
+    Status stat = fs->get_impl()->Open(*abs_path, &f);
+    if (!stat.ok()) {
+      Error(stat);
+      return nullptr;
+    }
+    if (f && fileEventCallback) {
+      f->SetFileEventCallback(fileEventCallback.value());
+    }
+    return new hdfsFile_internal(f);
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return nullptr;
+  } catch (...) {
+    ReportCaughtNonException();
+    return nullptr;
+  }
+}
+
+LIBHDFS_C_API
+int hdfsCloseFile(hdfsFS fs, hdfsFile file) {
+  try
+  {
+    errno = 0;
+    if (!CheckSystemAndHandle(fs, file)) {
+      return -1;
+    }
+    delete file;
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+char* hdfsGetWorkingDirectory(hdfsFS fs, char *buffer, size_t bufferSize) {
+  try
+  {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return nullptr;
+    }
+    std::string wd = fs->get_working_directory();
+    size_t size = wd.size();
+    if (size + 1 > bufferSize) {
+      std::stringstream ss;
+      ss << "hdfsGetWorkingDirectory: bufferSize is " << bufferSize <<
+          ", which is not enough to fit working directory of size " << (size + 1);
+      Error(Status::InvalidArgument(ss.str().c_str()));
+      return nullptr;
+    }
+    wd.copy(buffer, size);
+    buffer[size] = '\0';
+    return buffer;
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return nullptr;
+  } catch (...) {
+    ReportCaughtNonException();
+    return nullptr;
+  }
+}
+
+LIBHDFS_C_API
+int hdfsSetWorkingDirectory(hdfsFS fs, const char* path) {
+  try
+  {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    //Enforce last character to be '/'
+    std::string withSlash = *abs_path;
+    char last = withSlash.back();
+    if (last != '/'){
+      withSlash += '/';
+    }
+    fs->set_working_directory(withSlash);
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+int hdfsAvailable(hdfsFS fs, hdfsFile file) {
+  //Since we do not have read ahead implemented, return 0 if fs and file are good;
+  errno = 0;
+  if (!CheckSystemAndHandle(fs, file)) {
+    return -1;
+  }
+  return 0;
+}
+
+LIBHDFS_C_API
+tOffset hdfsGetDefaultBlockSize(hdfsFS fs) {
+  try {
+    errno = 0;
+    return fs->get_impl()->get_options().block_size;
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return -1;
+  } catch (...) {
+    ReportCaughtNonException();
+    return -1;
+  }
+}
+
+LIBHDFS_C_API
+tOffset hdfsGetDefaultBlockSizeAtPath(hdfsFS fs, const char *path) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    uint64_t block_size;
+    Status stat = fs->get_impl()->GetPreferredBlockSize(*abs_path, block_size);
+    if (!stat.ok()) {
+      if (stat.pathNotFound()){
+        return fs->get_impl()->get_options().block_size;
+      } else {
+        return Error(stat);
+      }
+    }
+    return block_size;
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return -1;
+  } catch (...) {
+    ReportCaughtNonException();
+    return -1;
+  }
+}
+
+LIBHDFS_C_API
+int hdfsSetReplication(hdfsFS fs, const char* path, int16_t replication) {
+    try {
+      errno = 0;
+      if (!CheckSystem(fs)) {
+        return -1;
+      }
+      const optional<std::string> abs_path = getAbsolutePath(fs, path);
+      if(!abs_path) {
+        return -1;
+      }
+      if(replication < 1){
+        return Error(Status::InvalidArgument("SetReplication: argument 'replication' cannot be less than 1"));
+      }
+      Status stat;
+      stat = fs->get_impl()->SetReplication(*abs_path, replication);
+      if (!stat.ok()) {
+        return Error(stat);
+      }
+      return 0;
+    } catch (const std::exception & e) {
+      return ReportException(e);
+    } catch (...) {
+      return ReportCaughtNonException();
+    }
+}
+
+LIBHDFS_C_API
+int hdfsUtime(hdfsFS fs, const char* path, tTime mtime, tTime atime) {
+    try {
+      errno = 0;
+      if (!CheckSystem(fs)) {
+        return -1;
+      }
+      const optional<std::string> abs_path = getAbsolutePath(fs, path);
+      if(!abs_path) {
+        return -1;
+      }
+      Status stat;
+      stat = fs->get_impl()->SetTimes(*abs_path, mtime, atime);
+      if (!stat.ok()) {
+        return Error(stat);
+      }
+      return 0;
+    } catch (const std::exception & e) {
+      return ReportException(e);
+    } catch (...) {
+      return ReportCaughtNonException();
+    }
+}
+
+LIBHDFS_C_API
+tOffset hdfsGetCapacity(hdfsFS fs) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+
+    hdfs::FsInfo fs_info;
+    Status stat = fs->get_impl()->GetFsStats(fs_info);
+    if (!stat.ok()) {
+      Error(stat);
+      return -1;
+    }
+    return fs_info.capacity;
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return -1;
+  } catch (...) {
+    ReportCaughtNonException();
+    return -1;
+  }
+}
+
+LIBHDFS_C_API
+tOffset hdfsGetUsed(hdfsFS fs) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+
+    hdfs::FsInfo fs_info;
+    Status stat = fs->get_impl()->GetFsStats(fs_info);
+    if (!stat.ok()) {
+      Error(stat);
+      return -1;
+    }
+    return fs_info.used;
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return -1;
+  } catch (...) {
+    ReportCaughtNonException();
+    return -1;
+  }
+}
+
+void StatInfoToHdfsFileInfo(hdfsFileInfo * file_info,
+                            const hdfs::StatInfo & stat_info) {
+  /* file or directory */
+  if (stat_info.file_type == StatInfo::IS_DIR) {
+    file_info->mKind = kObjectKindDirectory;
+  } else if (stat_info.file_type == StatInfo::IS_FILE) {
+    file_info->mKind = kObjectKindFile;
+  } else {
+    file_info->mKind = kObjectKindFile;
+    LOG_WARN(kFileSystem, << "Symlink is not supported! Reporting as a file: ");
+  }
+
+  /* the name of the file */
+  char copyOfPath[PATH_MAX];
+  strncpy(copyOfPath, stat_info.path.c_str(), PATH_MAX);
+  copyOfPath[PATH_MAX - 1] = '\0'; // in case strncpy ran out of space
+
+  char * mName = basename(copyOfPath);
+  size_t mName_size = strlen(mName);
+  file_info->mName = new char[mName_size+1];
+  strncpy(file_info->mName, basename(copyOfPath), mName_size + 1);
+
+  /* the last modification time for the file in seconds */
+  file_info->mLastMod = (tTime) stat_info.modification_time;
+
+  /* the size of the file in bytes */
+  file_info->mSize = (tOffset) stat_info.length;
+
+  /* the count of replicas */
+  file_info->mReplication = (short) stat_info.block_replication;
+
+  /* the block size for the file */
+  file_info->mBlockSize = (tOffset) stat_info.blocksize;
+
+  /* the owner of the file */
+  file_info->mOwner = new char[stat_info.owner.size() + 1];
+  strncpy(file_info->mOwner, stat_info.owner.c_str(), stat_info.owner.size() + 1);
+
+  /* the group associated with the file */
+  file_info->mGroup = new char[stat_info.group.size() + 1];
+  strncpy(file_info->mGroup, stat_info.group.c_str(), stat_info.group.size() + 1);
+
+  /* the permissions associated with the file encoded as an octal number (0777)*/
+  file_info->mPermissions = (short) stat_info.permissions;
+
+  /* the last access time for the file in seconds since the epoch*/
+  file_info->mLastAccess = stat_info.access_time;
+}
+
+LIBHDFS_C_API
+int hdfsExists(hdfsFS fs, const char *path) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    hdfs::StatInfo stat_info;
+    Status stat = fs->get_impl()->GetFileInfo(*abs_path, stat_info);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+hdfsFileInfo *hdfsGetPathInfo(hdfsFS fs, const char* path) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+       return nullptr;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return nullptr;
+    }
+    hdfs::StatInfo stat_info;
+    Status stat = fs->get_impl()->GetFileInfo(*abs_path, stat_info);
+    if (!stat.ok()) {
+      Error(stat);
+      return nullptr;
+    }
+    hdfsFileInfo *file_info = new hdfsFileInfo[1];
+    StatInfoToHdfsFileInfo(file_info, stat_info);
+    return file_info;
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return nullptr;
+  } catch (...) {
+    ReportCaughtNonException();
+    return nullptr;
+  }
+}
+
+LIBHDFS_C_API
+hdfsFileInfo *hdfsListDirectory(hdfsFS fs, const char* path, int *numEntries) {
+  try {
+      errno = 0;
+      if (!CheckSystem(fs)) {
+        *numEntries = 0;
+        return nullptr;
+      }
+      const optional<std::string> abs_path = getAbsolutePath(fs, path);
+      if(!abs_path) {
+        return nullptr;
+      }
+      std::vector<StatInfo> stat_infos;
+      Status stat = fs->get_impl()->GetListing(*abs_path, &stat_infos);
+      if (!stat.ok()) {
+        Error(stat);
+        *numEntries = 0;
+        return nullptr;
+      }
+      if(stat_infos.empty()){
+        *numEntries = 0;
+        return nullptr;
+      }
+      *numEntries = stat_infos.size();
+      hdfsFileInfo *file_infos = new hdfsFileInfo[stat_infos.size()];
+      for(std::vector<StatInfo>::size_type i = 0; i < stat_infos.size(); i++) {
+        StatInfoToHdfsFileInfo(&file_infos[i], stat_infos.at(i));
+      }
+
+      return file_infos;
+    } catch (const std::exception & e) {
+      ReportException(e);
+      *numEntries = 0;
+      return nullptr;
+    } catch (...) {
+      ReportCaughtNonException();
+      *numEntries = 0;
+      return nullptr;
+    }
+}
+
+LIBHDFS_C_API
+void hdfsFreeFileInfo(hdfsFileInfo *hdfsFileInfo, int numEntries)
+{
+    errno = 0;
+    int i;
+    for (i = 0; i < numEntries; ++i) {
+        delete[] hdfsFileInfo[i].mName;
+        delete[] hdfsFileInfo[i].mOwner;
+        delete[] hdfsFileInfo[i].mGroup;
+    }
+    delete[] hdfsFileInfo;
+}
+
+LIBHDFS_C_API
+int hdfsCreateDirectory(hdfsFS fs, const char* path) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    Status stat;
+    //Use default permissions and set true for creating all non-existant parent directories
+    stat = fs->get_impl()->Mkdirs(*abs_path, FileSystem::GetDefaultPermissionMask(), true);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+int hdfsDelete(hdfsFS fs, const char* path, int recursive) {
+  try {
+      errno = 0;
+      if (!CheckSystem(fs)) {
+        return -1;
+      }
+      const optional<std::string> abs_path = getAbsolutePath(fs, path);
+      if(!abs_path) {
+        return -1;
+      }
+      Status stat;
+      stat = fs->get_impl()->Delete(*abs_path, recursive);
+      if (!stat.ok()) {
+        return Error(stat);
+      }
+      return 0;
+    } catch (const std::exception & e) {
+      return ReportException(e);
+    } catch (...) {
+      return ReportCaughtNonException();
+    }
+}
+
+LIBHDFS_C_API
+int hdfsRename(hdfsFS fs, const char* oldPath, const char* newPath) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    const optional<std::string> old_abs_path = getAbsolutePath(fs, oldPath);
+    const optional<std::string> new_abs_path = getAbsolutePath(fs, newPath);
+    if(!old_abs_path || !new_abs_path) {
+      return -1;
+    }
+    Status stat;
+    stat = fs->get_impl()->Rename(*old_abs_path, *new_abs_path);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+int hdfsChmod(hdfsFS fs, const char* path, short mode){
+  try {
+      errno = 0;
+      if (!CheckSystem(fs)) {
+        return -1;
+      }
+      const optional<std::string> abs_path = getAbsolutePath(fs, path);
+      if(!abs_path) {
+        return -1;
+      }
+      Status stat = FileSystem::CheckValidPermissionMask(mode);
+      if (!stat.ok()) {
+        return Error(stat);
+      }
+      stat = fs->get_impl()->SetPermission(*abs_path, mode);
+      if (!stat.ok()) {
+        return Error(stat);
+      }
+      return 0;
+    } catch (const std::exception & e) {
+      return ReportException(e);
+    } catch (...) {
+      return ReportCaughtNonException();
+    }
+}
+
+LIBHDFS_C_API
+int hdfsChown(hdfsFS fs, const char* path, const char *owner, const char *group){
+  try {
+      errno = 0;
+      if (!CheckSystem(fs)) {
+        return -1;
+      }
+      const optional<std::string> abs_path = getAbsolutePath(fs, path);
+      if(!abs_path) {
+        return -1;
+      }
+      std::string own = (owner) ? owner : "";
+      std::string grp = (group) ? group : "";
+
+      Status stat;
+      stat = fs->get_impl()->SetOwner(*abs_path, own, grp);
+      if (!stat.ok()) {
+        return Error(stat);
+      }
+      return 0;
+    } catch (const std::exception & e) {
+      return ReportException(e);
+    } catch (...) {
+      return ReportCaughtNonException();
+    }
+}
+
+LIBHDFSPP_EXT_API
+hdfsFileInfo * hdfsFind(hdfsFS fs, const char* path, const char* name, uint32_t * numEntries){
+  try {
+      errno = 0;
+      if (!CheckSystem(fs)) {
+        *numEntries = 0;
+        return nullptr;
+      }
+
+      std::vector<StatInfo>  stat_infos;
+      Status stat = fs->get_impl()->Find(path, name, hdfs::FileSystem::GetDefaultFindMaxDepth(), &stat_infos);
+      if (!stat.ok()) {
+        Error(stat);
+        *numEntries = 0;
+        return nullptr;
+      }
+      //Existing API expects nullptr if size is 0
+      if(stat_infos.empty()){
+        *numEntries = 0;
+        return nullptr;
+      }
+      *numEntries = stat_infos.size();
+      hdfsFileInfo *file_infos = new hdfsFileInfo[stat_infos.size()];
+      for(std::vector<StatInfo>::size_type i = 0; i < stat_infos.size(); i++) {
+        StatInfoToHdfsFileInfo(&file_infos[i], stat_infos.at(i));
+      }
+
+      return file_infos;
+    } catch (const std::exception & e) {
+      ReportException(e);
+      *numEntries = 0;
+      return nullptr;
+    } catch (...) {
+      ReportCaughtNonException();
+      *numEntries = 0;
+      return nullptr;
+    }
+}
+
+LIBHDFSPP_EXT_API
+int hdfsCreateSnapshot(hdfsFS fs, const char* path, const char* name) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    Status stat;
+    if(!name){
+      stat = fs->get_impl()->CreateSnapshot(*abs_path, "");
+    } else {
+      stat = fs->get_impl()->CreateSnapshot(*abs_path, name);
+    }
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFSPP_EXT_API
+int hdfsDeleteSnapshot(hdfsFS fs, const char* path, const char* name) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    if (!name) {
+      return Error(Status::InvalidArgument("hdfsDeleteSnapshot: argument 'name' cannot be NULL"));
+    }
+    Status stat;
+    stat = fs->get_impl()->DeleteSnapshot(*abs_path, name);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+
+int hdfsRenameSnapshot(hdfsFS fs, const char* path, const char* old_name, const char* new_name) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    if (!old_name) {
+      return Error(Status::InvalidArgument("hdfsRenameSnapshot: argument 'old_name' cannot be NULL"));
+    }
+    if (!new_name) {
+      return Error(Status::InvalidArgument("hdfsRenameSnapshot: argument 'new_name' cannot be NULL"));
+    }
+    Status stat;
+    stat = fs->get_impl()->RenameSnapshot(*abs_path, old_name, new_name);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+
+}
+
+LIBHDFSPP_EXT_API
+int hdfsAllowSnapshot(hdfsFS fs, const char* path) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    Status stat;
+    stat = fs->get_impl()->AllowSnapshot(*abs_path);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFSPP_EXT_API
+int hdfsDisallowSnapshot(hdfsFS fs, const char* path) {
+  try {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    Status stat;
+    stat = fs->get_impl()->DisallowSnapshot(*abs_path);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+tSize hdfsPread(hdfsFS fs, hdfsFile file, tOffset position, void *buffer,
+                tSize length) {
+  try
+  {
+    errno = 0;
+    if (!CheckSystemAndHandle(fs, file)) {
+      return -1;
+    }
+
+    size_t len = 0;
+    Status stat = file->get_impl()->PositionRead(buffer, length, position, &len);
+    if(!stat.ok()) {
+      return Error(stat);
+    }
+    return (tSize)len;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+tSize hdfsRead(hdfsFS fs, hdfsFile file, void *buffer, tSize length) {
+  try
+  {
+    errno = 0;
+    if (!CheckSystemAndHandle(fs, file)) {
+      return -1;
+    }
+
+    size_t len = 0;
+    Status stat = file->get_impl()->Read(buffer, length, &len);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+
+    return (tSize)len;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+int hdfsUnbufferFile(hdfsFile file) {
+  //Currently we are not doing any buffering
+  CheckHandle(file);
+  return -1;
+}
+
+LIBHDFS_C_API
+int hdfsFileGetReadStatistics(hdfsFile file, struct hdfsReadStatistics **stats) {
+  try
+    {
+      errno = 0;
+      if (!CheckHandle(file)) {
+        return -1;
+      }
+      *stats = new hdfsReadStatistics;
+      memset(*stats, 0, sizeof(hdfsReadStatistics));
+      (*stats)->totalBytesRead = file->get_impl()->get_bytes_read();
+      return 0;
+    } catch (const std::exception & e) {
+      return ReportException(e);
+    } catch (...) {
+      return ReportCaughtNonException();
+    }
+}
+
+LIBHDFS_C_API
+int hdfsFileClearReadStatistics(hdfsFile file) {
+  try
+    {
+      errno = 0;
+      if (!CheckHandle(file)) {
+        return -1;
+      }
+      file->get_impl()->clear_bytes_read();
+      return 0;
+    } catch (const std::exception & e) {
+      return ReportException(e);
+    } catch (...) {
+      return ReportCaughtNonException();
+    }
+}
+
+LIBHDFS_C_API
+int64_t hdfsReadStatisticsGetRemoteBytesRead(const struct hdfsReadStatistics *stats) {
+    return stats->totalBytesRead - stats->totalLocalBytesRead;
+}
+
+LIBHDFS_C_API
+void hdfsFileFreeReadStatistics(struct hdfsReadStatistics *stats) {
+    errno = 0;
+    delete stats;
+}
+
+/* 0 on success, -1 on error*/
+LIBHDFS_C_API
+int hdfsSeek(hdfsFS fs, hdfsFile file, tOffset desiredPos) {
+  try
+  {
+    errno = 0;
+    if (!CheckSystemAndHandle(fs, file)) {
+      return -1;
+    }
+
+    off_t desired = desiredPos;
+    Status stat = file->get_impl()->Seek(&desired, std::ios_base::beg);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+tOffset hdfsTell(hdfsFS fs, hdfsFile file) {
+  try
+  {
+    errno = 0;
+    if (!CheckSystemAndHandle(fs, file)) {
+      return -1;
+    }
+
+    off_t offset = 0;
+    Status stat = file->get_impl()->Seek(&offset, std::ios_base::cur);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+
+    return (tOffset)offset;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+/* extended API */
+int hdfsCancel(hdfsFS fs, hdfsFile file) {
+  try
+  {
+    errno = 0;
+    if (!CheckSystemAndHandle(fs, file)) {
+      return -1;
+    }
+    static_cast<FileHandleImpl*>(file->get_impl())->CancelOperations();
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFSPP_EXT_API
+int hdfsGetBlockLocations(hdfsFS fs, const char *path, struct hdfsBlockLocations ** locations_out)
+{
+  try
+  {
+    errno = 0;
+    if (!CheckSystem(fs)) {
+      return -1;
+    }
+    if (locations_out == nullptr) {
+      ReportError(EINVAL, "Null pointer passed to hdfsGetBlockLocations");
+      return -1;
+    }
+    const optional<std::string> abs_path = getAbsolutePath(fs, path);
+    if(!abs_path) {
+      return -1;
+    }
+    std::shared_ptr<FileBlockLocation> ppLocations;
+    Status stat = fs->get_impl()->GetBlockLocations(*abs_path, 0, std::numeric_limits<int64_t>::max(), &ppLocations);
+    if (!stat.ok()) {
+      return Error(stat);
+    }
+
+    hdfsBlockLocations *locations = new struct hdfsBlockLocations();
+    (*locations_out) = locations;
+
+    bzero(locations, sizeof(*locations));
+    locations->fileLength = ppLocations->getFileLength();
+    locations->isLastBlockComplete = ppLocations->isLastBlockComplete();
+    locations->isUnderConstruction = ppLocations->isUnderConstruction();
+
+    const std::vector<BlockLocation> & ppBlockLocations = ppLocations->getBlockLocations();
+    locations->num_blocks = ppBlockLocations.size();
+    locations->blocks = new struct hdfsBlockInfo[locations->num_blocks];
+    for (size_t i=0; i < ppBlockLocations.size(); i++) {
+      auto ppBlockLocation = ppBlockLocations[i];
+      auto block = &locations->blocks[i];
+
+      block->num_bytes = ppBlockLocation.getLength();
+      block->start_offset = ppBlockLocation.getOffset();
+
+      const std::vector<DNInfo> & ppDNInfos = ppBlockLocation.getDataNodes();
+      block->num_locations = ppDNInfos.size();
+      block->locations = new hdfsDNInfo[block->num_locations];
+      for (size_t j=0; j < block->num_locations; j++) {
+        auto ppDNInfo = ppDNInfos[j];
+        auto dn_info = &block->locations[j];
+
+        dn_info->xfer_port = ppDNInfo.getXferPort();
+        dn_info->info_port = ppDNInfo.getInfoPort();
+        dn_info->IPC_port  = ppDNInfo.getIPCPort();
+        dn_info->info_secure_port = ppDNInfo.getInfoSecurePort();
+
+        char * buf;
+        buf = new char[ppDNInfo.getHostname().size() + 1];
+        strncpy(buf, ppDNInfo.getHostname().c_str(), ppDNInfo.getHostname().size() + 1);
+        dn_info->hostname = buf;
+
+        buf = new char[ppDNInfo.getIPAddr().size() + 1];
+        strncpy(buf, ppDNInfo.getIPAddr().c_str(), ppDNInfo.getIPAddr().size() + 1);
+        dn_info->ip_address = buf;
+
+        buf = new char[ppDNInfo.getNetworkLocation().size() + 1];
+        strncpy(buf, ppDNInfo.getNetworkLocation().c_str(), ppDNInfo.getNetworkLocation().size() + 1);
+        dn_info->network_location = buf;
+      }
+    }
+
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFSPP_EXT_API
+int hdfsFreeBlockLocations(struct hdfsBlockLocations * blockLocations) {
+  errno = 0;
+  if (blockLocations == nullptr)
+    return 0;
+
+  for (size_t i=0; i < blockLocations->num_blocks; i++) {
+    auto block = &blockLocations->blocks[i];
+    for (size_t j=0; j < block->num_locations; j++) {
+      auto location = &block->locations[j];
+      delete[] location->hostname;
+      delete[] location->ip_address;
+      delete[] location->network_location;
+    }
+  }
+  delete[] blockLocations->blocks;
+  delete blockLocations;
+
+  return 0;
+}
+
+LIBHDFS_C_API
+char*** hdfsGetHosts(hdfsFS fs, const char* path, tOffset start, tOffset length) {
+  try
+    {
+      errno = 0;
+      if (!CheckSystem(fs)) {
+        return nullptr;
+      }
+      const optional<std::string> abs_path = getAbsolutePath(fs, path);
+      if(!abs_path) {
+        return nullptr;
+      }
+      std::shared_ptr<FileBlockLocation> ppLocations;
+      Status stat = fs->get_impl()->GetBlockLocations(*abs_path, start, length, &ppLocations);
+      if (!stat.ok()) {
+        Error(stat);
+        return nullptr;
+      }
+      const std::vector<BlockLocation> & ppBlockLocations = ppLocations->getBlockLocations();
+      char ***hosts = new char**[ppBlockLocations.size() + 1];
+      for (size_t i=0; i < ppBlockLocations.size(); i++) {
+        const std::vector<DNInfo> & ppDNInfos = ppBlockLocations[i].getDataNodes();
+        hosts[i] = new char*[ppDNInfos.size() + 1];
+        for (size_t j=0; j < ppDNInfos.size(); j++) {
+          auto ppDNInfo = ppDNInfos[j];
+          hosts[i][j] = new char[ppDNInfo.getHostname().size() + 1];
+          strncpy(hosts[i][j], ppDNInfo.getHostname().c_str(), ppDNInfo.getHostname().size() + 1);
+        }
+        hosts[i][ppDNInfos.size()] = nullptr;
+      }
+      hosts[ppBlockLocations.size()] = nullptr;
+      return hosts;
+    } catch (const std::exception & e) {
+      ReportException(e);
+      return nullptr;
+    } catch (...) {
+      ReportCaughtNonException();
+      return nullptr;
+    }
+}
+
+LIBHDFS_C_API
+void hdfsFreeHosts(char ***blockHosts) {
+  errno = 0;
+  if (blockHosts == nullptr)
+    return;
+
+  for (size_t i = 0; blockHosts[i]; i++) {
+    for (size_t j = 0; blockHosts[i][j]; j++) {
+      delete[] blockHosts[i][j];
+    }
+    delete[] blockHosts[i];
+  }
+  delete blockHosts;
+}
+
+/*******************************************************************
+ *                EVENT CALLBACKS
+ *******************************************************************/
+
+const char * FS_NN_CONNECT_EVENT = hdfs::FS_NN_CONNECT_EVENT;
+const char * FS_NN_READ_EVENT = hdfs::FS_NN_READ_EVENT;
+const char * FS_NN_WRITE_EVENT = hdfs::FS_NN_WRITE_EVENT;
+
+const char * FILE_DN_CONNECT_EVENT = hdfs::FILE_DN_CONNECT_EVENT;
+const char * FILE_DN_READ_EVENT = hdfs::FILE_DN_READ_EVENT;
+const char * FILE_DN_WRITE_EVENT = hdfs::FILE_DN_WRITE_EVENT;
+
+
+event_response fs_callback_glue(libhdfspp_fs_event_callback handler,
+                      int64_t cookie,
+                      const char * event,
+                      const char * cluster,
+                      int64_t value) {
+  int result = handler(event, cluster, value, cookie);
+  if (result == LIBHDFSPP_EVENT_OK) {
+    return event_response::make_ok();
+  }
+#ifndef LIBHDFSPP_SIMULATE_ERROR_DISABLED
+  if (result == DEBUG_SIMULATE_ERROR) {
+    return event_response::test_err(Status::Error("Simulated error"));
+  }
+#endif
+
+  return event_response::make_ok();
+}
+
+event_response file_callback_glue(libhdfspp_file_event_callback handler,
+                      int64_t cookie,
+                      const char * event,
+                      const char * cluster,
+                      const char * file,
+                      int64_t value) {
+  int result = handler(event, cluster, file, value, cookie);
+  if (result == LIBHDFSPP_EVENT_OK) {
+    return event_response::make_ok();
+  }
+#ifndef LIBHDFSPP_SIMULATE_ERROR_DISABLED
+  if (result == DEBUG_SIMULATE_ERROR) {
+    return event_response::test_err(Status::Error("Simulated error"));
+  }
+#endif
+
+  return event_response::make_ok();
+}
+
+LIBHDFSPP_EXT_API
+int hdfsPreAttachFSMonitor(libhdfspp_fs_event_callback handler, int64_t cookie)
+{
+  fs_event_callback callback = std::bind(fs_callback_glue, handler, cookie, _1, _2, _3);
+  fsEventCallback = callback;
+  return 0;
+}
+
+LIBHDFSPP_EXT_API
+int hdfsPreAttachFileMonitor(libhdfspp_file_event_callback handler, int64_t cookie)
+{
+  file_event_callback callback = std::bind(file_callback_glue, handler, cookie, _1, _2, _3, _4);
+  fileEventCallback = callback;
+  return 0;
+}
+
+/*******************************************************************
+ *                BUILDER INTERFACE
+ *******************************************************************/
+
+HdfsConfiguration LoadDefault(ConfigurationLoader & loader)
+{
+  optional<HdfsConfiguration> result = loader.LoadDefaultResources<HdfsConfiguration>();
+  if (result)
+  {
+    return result.value();
+  }
+  else
+  {
+    return loader.NewConfig<HdfsConfiguration>();
+  }
+}
+
+hdfsBuilder::hdfsBuilder() : config(loader.NewConfig<HdfsConfiguration>())
+{
+  errno = 0;
+  config = LoadDefault(loader);
+}
+
+hdfsBuilder::hdfsBuilder(const char * directory) :
+      config(loader.NewConfig<HdfsConfiguration>())
+{
+  errno = 0;
+  loader.SetSearchPath(directory);
+  config = LoadDefault(loader);
+}
+
+LIBHDFS_C_API
+struct hdfsBuilder *hdfsNewBuilder(void)
+{
+  try
+  {
+    errno = 0;
+    return new struct hdfsBuilder();
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return nullptr;
+  } catch (...) {
+    ReportCaughtNonException();
+    return nullptr;
+  }
+}
+
+LIBHDFS_C_API
+void hdfsBuilderSetNameNode(struct hdfsBuilder *bld, const char *nn)
+{
+  errno = 0;
+  bld->overrideHost = std::string(nn);
+}
+
+LIBHDFS_C_API
+void hdfsBuilderSetNameNodePort(struct hdfsBuilder *bld, tPort port)
+{
+  errno = 0;
+  bld->overridePort = port;
+}
+
+LIBHDFS_C_API
+void hdfsBuilderSetUserName(struct hdfsBuilder *bld, const char *userName)
+{
+  errno = 0;
+  if (userName && *userName) {
+    bld->user = std::string(userName);
+  }
+}
+
+LIBHDFS_C_API
+void hdfsBuilderSetForceNewInstance(struct hdfsBuilder *bld) {
+  //libhdfspp always returns a new instance, so nothing to do
+  (void)bld;
+  errno = 0;
+}
+
+LIBHDFS_C_API
+void hdfsFreeBuilder(struct hdfsBuilder *bld)
+{
+  try
+  {
+    errno = 0;
+    delete bld;
+  } catch (const std::exception & e) {
+    ReportException(e);
+  } catch (...) {
+    ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+int hdfsBuilderConfSetStr(struct hdfsBuilder *bld, const char *key,
+                          const char *val)
+{
+  try
+  {
+    errno = 0;
+    optional<HdfsConfiguration> newConfig = bld->loader.OverlayValue(bld->config, key, val);
+    if (newConfig)
+    {
+      bld->config = newConfig.value();
+      return 0;
+    }
+    else
+    {
+      ReportError(EINVAL, "Could not change Builder value");
+      return -1;
+    }
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+void hdfsConfStrFree(char *val)
+{
+  errno = 0;
+  free(val);
+}
+
+LIBHDFS_C_API
+hdfsFS hdfsBuilderConnect(struct hdfsBuilder *bld) {
+  hdfsFS fs = doHdfsConnect(bld->overrideHost, bld->overridePort, bld->user, bld->config.GetOptions());
+  // Always free the builder
+  hdfsFreeBuilder(bld);
+  return fs;
+}
+
+LIBHDFS_C_API
+int hdfsConfGetStr(const char *key, char **val)
+{
+  try
+  {
+    errno = 0;
+    hdfsBuilder builder;
+    return hdfsBuilderConfGetStr(&builder, key, val);
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFS_C_API
+int hdfsConfGetInt(const char *key, int32_t *val)
+{
+  try
+  {
+    errno = 0;
+    hdfsBuilder builder;
+    return hdfsBuilderConfGetInt(&builder, key, val);
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+//
+//  Extended builder interface
+//
+struct hdfsBuilder *hdfsNewBuilderFromDirectory(const char * configDirectory)
+{
+  try
+  {
+    errno = 0;
+    return new struct hdfsBuilder(configDirectory);
+  } catch (const std::exception & e) {
+    ReportException(e);
+    return nullptr;
+  } catch (...) {
+    ReportCaughtNonException();
+    return nullptr;
+  }
+}
+
+LIBHDFSPP_EXT_API
+int hdfsBuilderConfGetStr(struct hdfsBuilder *bld, const char *key,
+                          char **val)
+{
+  try
+  {
+    errno = 0;
+    optional<std::string> value = bld->config.Get(key);
+    if (value)
+    {
+      size_t len = value->length() + 1;
+      *val = static_cast<char *>(malloc(len));
+      strncpy(*val, value->c_str(), len);
+    }
+    else
+    {
+      *val = nullptr;
+    }
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+// If we're running on a 32-bit platform, we might get 64-bit values that
+//    don't fit in an int, and int is specified by the java hdfs.h interface
+bool isValidInt(int64_t value)
+{
+  return (value >= std::numeric_limits<int>::min() &&
+          value <= std::numeric_limits<int>::max());
+}
+
+LIBHDFSPP_EXT_API
+int hdfsBuilderConfGetInt(struct hdfsBuilder *bld, const char *key, int32_t *val)
+{
+  try
+  {
+    errno = 0;
+    // Pull from default configuration
+    optional<int64_t> value = bld->config.GetInt(key);
+    if (value)
+    {
+      if (!isValidInt(*value)){
+        ReportError(EINVAL, "Builder value is not valid");
+        return -1;
+      }
+      *val = *value;
+      return 0;
+    }
+    // If not found, don't change val
+    ReportError(EINVAL, "Could not get Builder value");
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+LIBHDFSPP_EXT_API
+int hdfsBuilderConfGetLong(struct hdfsBuilder *bld, const char *key, int64_t *val)
+{
+  try
+  {
+    errno = 0;
+    // Pull from default configuration
+    optional<int64_t> value = bld->config.GetInt(key);
+    if (value)
+    {
+      *val = *value;
+      return 0;
+    }
+    // If not found, don't change val
+    ReportError(EINVAL, "Could not get Builder value");
+    return 0;
+  } catch (const std::exception & e) {
+    return ReportException(e);
+  } catch (...) {
+    return ReportCaughtNonException();
+  }
+}
+
+/**
+ * Logging functions
+ **/
+class CForwardingLogger : public LoggerInterface {
+ public:
+  CForwardingLogger() : callback_(nullptr) {};
+
+  // Converts LogMessage into LogData, a POD type,
+  // and invokes callback_ if it's not null.
+  void Write(const LogMessage& msg);
+
+  // pass in NULL to clear the hook
+  void SetCallback(void (*callback)(LogData*));
+
+  //return a copy, or null on failure.
+  static LogData *CopyLogData(const LogData*);
+  //free LogData allocated with CopyLogData
+  static void FreeLogData(LogData*);
+ private:
+  void (*callback_)(LogData*);
+};
+
+/**
+ *  Plugin to forward message to a C function pointer
+ **/
+void CForwardingLogger::Write(const LogMessage& msg) {
+  if(!callback_)
+    return;
+
+  const std::string text = msg.MsgString();
+
+  LogData data;
+  data.level = msg.level();
+  data.component = msg.component();
+  data.msg = text.c_str();
+  data.file_name = msg.file_name();
+  data.file_line = msg.file_line();
+  callback_(&data);
+}
+
+void CForwardingLogger::SetCallback(void (*callback)(LogData*)) {
+  callback_ = callback;
+}
+
+LogData *CForwardingLogger::CopyLogData(const LogData *orig) {
+  if(!orig)
+    return nullptr;
+
+  LogData *copy = (LogData*)malloc(sizeof(LogData));
+  if(!copy)
+    return nullptr;
+
+  copy->level = orig->level;
+  copy->component = orig->component;
+  if(orig->msg)
+    copy->msg = strdup(orig->msg);
+  copy->file_name = orig->file_name;
+  copy->file_line = orig->file_line;
+  return copy;
+}
+
+void CForwardingLogger::FreeLogData(LogData *data) {
+  if(!data)
+    return;
+  if(data->msg)
+    free((void*)data->msg);
+
+  // Inexpensive way to help catch use-after-free
+  memset(data, 0, sizeof(LogData));
+  free(data);
+}
+
+LIBHDFSPP_EXT_API
+LogData *hdfsCopyLogData(LogData *data) {
+  return CForwardingLogger::CopyLogData(data);
+}
+
+LIBHDFSPP_EXT_API
+void hdfsFreeLogData(LogData *data) {
+  CForwardingLogger::FreeLogData(data);
+}
+
+LIBHDFSPP_EXT_API
+void hdfsSetLogFunction(void (*callback)(LogData*)) {
+  CForwardingLogger *logger = new CForwardingLogger();
+  logger->SetCallback(callback);
+  LogManager::SetLoggerImplementation(std::unique_ptr<LoggerInterface>(logger));
+}
+
+static bool IsLevelValid(int component) {
+  if(component < HDFSPP_LOG_LEVEL_TRACE || component > HDFSPP_LOG_LEVEL_ERROR)
+    return false;
+  return true;
+}
+
+
+//  should use  __builtin_popcnt as optimization on some platforms
+static int popcnt(int val) {
+  int bits = sizeof(val) * 8;
+  int count = 0;
+  for(int i=0; i<bits; i++) {
+    if((val >> i) & 0x1)
+      count++;
+  }
+  return count;
+}
+
+static bool IsComponentValid(int component) {
+  if(component < HDFSPP_LOG_COMPONENT_UNKNOWN || component > HDFSPP_LOG_COMPONENT_FILESYSTEM)
+    return false;
+  if(popcnt(component) != 1)
+    return false;
+  return true;
+}
+
+LIBHDFSPP_EXT_API
+int hdfsEnableLoggingForComponent(int component) {
+  errno = 0;
+  if(!IsComponentValid(component))
+    return -1;
+  LogManager::EnableLogForComponent(static_cast<LogSourceComponent>(component));
+  return 0;
+}
+
+LIBHDFSPP_EXT_API
+int hdfsDisableLoggingForComponent(int component) {
+  errno = 0;
+  if(!IsComponentValid(component))
+    return -1;
+  LogManager::DisableLogForComponent(static_cast<LogSourceComponent>(component));
+  return 0;
+}
+
+LIBHDFSPP_EXT_API
+int hdfsSetLoggingLevel(int level) {
+  errno = 0;
+  if(!IsLevelValid(level))
+    return -1;
+  LogManager::SetLogLevel(static_cast<LogLevel>(level));
+  return 0;
+}
+
+#undef LIBHDFS_C_API
+#undef LIBHDFSPP_EXT_API
+
+

+ 24 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/CMakeLists.txt

@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+if(NEED_LINK_DL)
+   set(LIB_DL dl)
+endif()
+
+add_library(common_obj OBJECT status.cc sasl_digest_md5.cc hdfs_ioservice.cc options.cc configuration.cc configuration_loader.cc hdfs_configuration.cc uri.cc util.cc retry_policy.cc cancel_tracker.cc logging.cc libhdfs_events_impl.cc auth_info.cc namenode_info.cc statinfo.cc fsinfo.cc content_summary.cc locks.cc config_parser.cc)
+add_library(common $<TARGET_OBJECTS:common_obj> $<TARGET_OBJECTS:uriparser2_obj>)
+target_link_libraries(common ${LIB_DL})

+ 49 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/async_stream.h

@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIB_COMMON_ASYNC_STREAM_H_
+#define LIB_COMMON_ASYNC_STREAM_H_
+
+#include <asio.hpp>
+
+namespace hdfs {
+
+typedef asio::mutable_buffers_1 MutableBuffers;
+typedef asio::const_buffers_1   ConstBuffers;
+
+/*
+ * asio-compatible stream implementation.
+ *
+ * Lifecycle: should be managed using std::shared_ptr so the object can be
+ *    handed from consumer to consumer
+ * Threading model: async_read_some and async_write_some are not thread-safe.
+ */
+class AsyncStream  {
+public:
+  virtual void async_read_some(const MutableBuffers &buf,
+          std::function<void (const asio::error_code & error,
+                                 std::size_t bytes_transferred) > handler) = 0;
+
+  virtual void async_write_some(const ConstBuffers &buf,
+            std::function<void (const asio::error_code & error,
+                                 std::size_t bytes_transferred) > handler) = 0;
+};
+
+}
+
+#endif

+ 18 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/auth_info.cc

@@ -0,0 +1,18 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "auth_info.h"

+ 90 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/auth_info.h

@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIB_FS_AUTHINFO_H
+#define LIB_FS_AUTHINFO_H
+
+#include "common/optional_wrapper.h"
+
+namespace hdfs {
+
+class Token {
+public:
+  std::string identifier;
+  std::string password;
+};
+
+class AuthInfo {
+public:
+    enum AuthMethod {
+        kSimple,
+        kKerberos,
+        kToken,
+        kUnknownAuth,
+        kAuthFailed
+    };
+
+    AuthInfo() :
+        method(kSimple) {
+    }
+
+    explicit AuthInfo(AuthMethod mech) :
+        method(mech) {
+    }
+
+    bool useSASL() {
+        return method != kSimple;
+    }
+
+    const std::string & getUser() const {
+        return user;
+    }
+
+    void setUser(const std::string & user) {
+        this->user = user;
+    }
+
+    AuthMethod getMethod() const {
+        return method;
+    }
+
+    void setMethod(AuthMethod method) {
+        this->method = method;
+    }
+
+    const std::experimental::optional<Token> & getToken() const {
+        return token;
+    }
+
+    void setToken(const Token & token) {
+        this->token = token;
+    }
+
+    void clearToken() {
+        this->token = std::experimental::nullopt;
+    }
+
+private:
+    AuthMethod method;
+    std::string user;
+    std::experimental::optional<Token> token;
+};
+
+}
+
+#endif /* RPCAUTHINFO_H */

+ 37 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/cancel_tracker.cc

@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#include "cancel_tracker.h"
+
+namespace hdfs {
+
+CancelTracker::CancelTracker() : canceled_(false) {}
+
+std::shared_ptr<CancelTracker> CancelTracker::New() {
+  return std::make_shared<CancelTracker>();
+}
+
+bool CancelTracker::is_canceled() {
+  return canceled_;
+}
+
+void CancelTracker::set_canceled() {
+  canceled_ = true;
+}
+
+}

+ 40 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/cancel_tracker.h

@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ **/
+
+#ifndef COMMON_CANCELTRACKER_H
+#define COMMON_CANCELTRACKER_H
+
+#include <memory>
+#include <atomic>
+
+namespace hdfs {
+
+class CancelTracker : public std::enable_shared_from_this<CancelTracker> {
+ public:
+  CancelTracker();
+  static std::shared_ptr<CancelTracker> New();
+  void set_canceled();
+  bool is_canceled();
+ private:
+  std::atomic_bool canceled_;
+};
+
+typedef std::shared_ptr<CancelTracker> CancelHandle;
+
+}
+#endif

+ 219 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/config_parser.cc

@@ -0,0 +1,219 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hdfspp/config_parser.h"
+#include "common/hdfs_configuration.h"
+#include "common/configuration_loader.h"
+
+#include <string>
+#include <memory>
+#include <vector>
+#include <numeric>
+
+namespace hdfs {
+
+static const char kSearchPathSeparator = ':';
+
+HdfsConfiguration LoadDefault(ConfigurationLoader & loader)
+{
+  optional<HdfsConfiguration> result = loader.LoadDefaultResources<HdfsConfiguration>();
+  if (result)
+  {
+    return result.value();
+  }
+  else
+  {
+    return loader.NewConfig<HdfsConfiguration>();
+  }
+}
+
+class ConfigParser::impl {
+ public:
+  impl() :
+      config_(loader_.NewConfig<HdfsConfiguration>()) {
+  }
+
+  impl(const std::vector<std::string>& dirs) :
+      config_(loader_.NewConfig<HdfsConfiguration>()) {
+
+      // Convert vector of paths into ':' separated path
+      std::string path = std::accumulate(dirs.begin(), dirs.end(), std::string(""),
+        [](std::string cumm, std::string elem) {return cumm + kSearchPathSeparator + elem;});
+      loader_.SetSearchPath(path);
+      config_ = LoadDefault(loader_);
+  }
+
+  impl(const std::string& path) :
+      config_(loader_.NewConfig<HdfsConfiguration>()) {
+
+      loader_.SetSearchPath(path);
+      config_ = LoadDefault(loader_);
+  }
+
+  bool LoadDefaultResources() {
+    config_ = LoadDefault(loader_);
+    return true;
+  }
+
+  std::vector<std::pair<std::string, Status> > ValidateResources() const {
+    return loader_.ValidateDefaultResources<HdfsConfiguration>();
+  }
+
+  bool get_int(const std::string& key, int& outval) const {
+    auto ret = config_.GetInt(key);
+    if (!ret) {
+      return false;
+    } else {
+      outval = *ret;
+      return true;
+    }
+  }
+
+  bool get_string(const std::string& key, std::string& outval) const {
+    auto ret = config_.Get(key);
+    if (!ret) {
+      return false;
+    } else {
+      outval = *ret;
+      return true;
+    }
+  }
+
+  bool get_bool(const std::string& key, bool& outval) const {
+    auto ret = config_.GetBool(key);
+    if (!ret) {
+      return false;
+    } else {
+      outval = *ret;
+      return true;
+    }
+  }
+
+  bool get_double(const std::string& key, double& outval) const {
+    auto ret = config_.GetDouble(key);
+    if (!ret) {
+      return false;
+    } else {
+      outval = *ret;
+      return true;
+    }
+  }
+
+  bool get_uri(const std::string& key, URI& outval) const {
+    auto ret = config_.GetUri(key);
+    if (!ret) {
+      return false;
+    } else {
+      outval = *ret;
+      return true;
+    }
+  }
+
+  bool get_options(Options& outval) {
+    outval = config_.GetOptions();
+    return true;
+  }
+
+ private:
+  ConfigurationLoader loader_;
+  HdfsConfiguration config_;
+};
+
+
+ConfigParser::ConfigParser() {
+  pImpl.reset(new ConfigParser::impl());
+}
+
+ConfigParser::ConfigParser(const std::vector<std::string>& configDirectories) {
+  pImpl.reset(new ConfigParser::impl(configDirectories));
+}
+
+ConfigParser::ConfigParser(const std::string& path) {
+  pImpl.reset(new ConfigParser::impl(path));
+}
+
+ConfigParser::~ConfigParser() = default;
+ConfigParser::ConfigParser(ConfigParser&&) = default;
+ConfigParser& ConfigParser::operator=(ConfigParser&&) = default;
+
+bool ConfigParser::LoadDefaultResources() { return pImpl->LoadDefaultResources(); }
+std::vector<std::pair<std::string, Status> > ConfigParser::ValidateResources() const { return pImpl->ValidateResources();}
+
+bool ConfigParser::get_int(const std::string& key, int& outval) const { return pImpl->get_int(key, outval); }
+int ConfigParser::get_int_or(const std::string& key, const int defaultval) const {
+  int res = 0;
+  if(get_int(key, res)) {
+    return res;
+  } else {
+    return defaultval;
+  }
+}
+
+bool ConfigParser::get_string(const std::string& key, std::string& outval) const { return pImpl->get_string(key, outval); }
+std::string ConfigParser::get_string_or(const std::string& key, const std::string& defaultval) const {
+  std::string res;
+  if(get_string(key, res)) {
+    return res;
+  } else {
+    return defaultval;
+  }
+}
+
+bool ConfigParser::get_bool(const std::string& key, bool& outval) const { return pImpl->get_bool(key, outval); }
+bool ConfigParser::get_bool_or(const std::string& key, const bool defaultval) const {
+  bool res = false;
+  if(get_bool(key, res)) {
+    return res;
+  } else {
+    return defaultval;
+  }
+}
+
+bool ConfigParser::get_double(const std::string& key, double& outval) const { return pImpl->get_double(key, outval); }
+double ConfigParser::get_double_or(const std::string& key, const double defaultval) const {
+  double res = 0;
+  if(get_double(key, res)) {
+    return res;
+  } else {
+    return defaultval;
+  }
+}
+
+bool ConfigParser::get_uri(const std::string& key, URI& outval) const { return pImpl->get_uri(key, outval); }
+URI ConfigParser::get_uri_or(const std::string& key, const URI& defaultval) const {
+  URI res;
+  if(get_uri(key, res)) {
+    return res;
+  } else {
+    res = defaultval;
+    return res;
+  }
+}
+
+bool ConfigParser::get_options(Options& outval) const { return pImpl->get_options(outval); }
+Options ConfigParser::get_options_or(const Options& defaultval) const {
+  Options res;
+  if(get_options(res)) {
+    return res;
+  } else {
+    res = defaultval;
+    return res;
+  }
+}
+
+} // end namespace hdfs

+ 169 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration.cc

@@ -0,0 +1,169 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * The following features are not currently implemented
+ * - Deprecated values
+ * - Make filename and config file contents unicode-safe
+ * - Config redirection/environment substitution
+ *
+ * - getInts (comma separated))
+ * - getStrings (comma separated))
+ * - getIntegerRange
+ * - getSocketAddr
+ * - getTimeDuration
+ * - getBytes (e.g. 1M or 1G)
+ * - hex values
+ */
+
+#include "configuration.h"
+#include "hdfspp/uri.h"
+
+#include <strings.h>
+#include <sstream>
+#include <map>
+#include <rapidxml/rapidxml.hpp>
+#include <rapidxml/rapidxml_utils.hpp>
+
+namespace hdfs {
+
+/*
+ * Configuration class
+ */
+std::vector<std::string> Configuration::GetDefaultFilenames() {
+  auto result = std::vector<std::string>();
+  result.push_back("core-site.xml");
+  return result;
+}
+
+
+optional<std::string> Configuration::Get(const std::string& key) const {
+  std::string caseFixedKey = fixCase(key);
+  auto found = raw_values_.find(caseFixedKey);
+  if (found != raw_values_.end()) {
+    return std::experimental::make_optional(found->second.value);
+  } else {
+    return optional<std::string>();
+  }
+}
+
+std::string Configuration::GetWithDefault(
+    const std::string& key, const std::string& default_value) const {
+  return Get(key).value_or(default_value);
+}
+
+optional<int64_t> Configuration::GetInt(const std::string& key) const {
+  auto raw = Get(key);
+  if (raw) {
+    errno = 0;
+    char* end = nullptr;
+    optional<int64_t> result =
+        std::experimental::make_optional(static_cast<int64_t>(strtol(raw->c_str(), &end, 10)));
+    if (end == raw->c_str()) {
+      /* strtoll will set end to input if no conversion was done */
+      return optional<int64_t>();
+    }
+    if (errno == ERANGE) {
+      return optional<int64_t>();
+    }
+
+    return result;
+  } else {
+    return optional<int64_t>();
+  }
+}
+
+int64_t Configuration::GetIntWithDefault(const std::string& key,
+                                         int64_t default_value) const {
+  return GetInt(key).value_or(default_value);
+}
+
+optional<double> Configuration::GetDouble(const std::string& key) const {
+  auto raw = Get(key);
+  if (raw) {
+    errno = 0;
+    char* end = nullptr;
+    auto result = std::experimental::make_optional(strtod(raw->c_str(), &end));
+    if (end == raw->c_str()) {
+      /* strtod will set end to input if no conversion was done */
+      return optional<double>();
+    }
+    if (errno == ERANGE) {
+      return optional<double>();
+    }
+
+    return result;
+  } else {
+    return optional<double>();
+  }
+}
+
+double Configuration::GetDoubleWithDefault(const std::string& key,
+                                           double default_value) const {
+  return GetDouble(key).value_or(default_value);
+}
+
+optional<bool> Configuration::GetBool(const std::string& key) const {
+  auto raw = Get(key);
+  if (!raw) {
+    return optional<bool>();
+  }
+
+  if (!strcasecmp(raw->c_str(), "true")) {
+    return std::experimental::make_optional(true);
+  }
+  if (!strcasecmp(raw->c_str(), "false")) {
+    return std::experimental::make_optional(false);
+  }
+
+  return optional<bool>();
+}
+
+bool Configuration::GetBoolWithDefault(const std::string& key,
+                                       bool default_value) const {
+  return GetBool(key).value_or(default_value);
+}
+
+optional<URI> Configuration::GetUri(const std::string& key) const {
+  optional<std::string> raw = Get(key);
+  if (raw) {
+    try {
+      return std::experimental::make_optional(URI::parse_from_string(*raw));
+    } catch (const uri_parse_error& e) {
+      // Return empty below
+    }
+  }
+  return optional<URI>();
+}
+
+URI Configuration::GetUriWithDefault(const std::string& key,
+                                     std::string default_value) const {
+  optional<URI> result = GetUri(key);
+  if (result) {
+    return *result;
+  } else {
+    try {
+      return URI::parse_from_string(default_value);
+    } catch (const uri_parse_error& e) {
+      return URI();
+    }
+  }
+}
+
+
+}

+ 108 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration.h

@@ -0,0 +1,108 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_CONFIGURATION_H_
+#define COMMON_CONFIGURATION_H_
+
+#include "hdfspp/uri.h"
+
+#include <string>
+#include <map>
+#include <vector>
+#include <set>
+#include <istream>
+#include <stdint.h>
+#include "common/optional_wrapper.h"
+
+namespace hdfs {
+
+template <class T>
+using optional = std::experimental::optional<T>;
+
+/**
+ * Configuration class that parses XML.
+ *
+ * Files should be an XML file of the form
+ * <configuration>
+ *  <property>
+ *    <name>Name</name>
+ *    <value>Value</value>
+ *  </property>
+ * <configuration>
+ *
+ * Configuration objects should be created via the ConfigurationLoader class.
+ * Configuration objects are immutable and can be shared between threads.
+ *
+ * This class is thread-safe.
+ */
+class Configuration {
+ public:
+  // Gets values
+  std::string           GetWithDefault(const std::string &key,
+                                       const std::string &default_value) const;
+  optional<std::string> Get(const std::string &key) const;
+  int64_t               GetIntWithDefault(const std::string &key,
+                                          int64_t default_value) const;
+  optional<int64_t>     GetInt(const std::string &key) const;
+  double                GetDoubleWithDefault(const std::string &key,
+                                             double default_value) const;
+  optional<double>      GetDouble(const std::string &key) const;
+  bool                  GetBoolWithDefault(const std::string &key,
+                                           bool default_value) const;
+  optional<bool>        GetBool(const std::string &key) const;
+  URI                   GetUriWithDefault(const std::string &key,
+                                          std::string default_value) const;
+  optional<URI>         GetUri(const std::string &key) const;
+
+protected:
+   friend class ConfigurationLoader;
+
+  /* Transparent data holder for property values */
+  struct ConfigData {
+    std::string value;
+    bool final;
+    ConfigData() : final(false){}
+    ConfigData(const std::string &value_) : value(value_), final(false) {}
+    void operator=(const std::string &new_value) {
+      value = new_value;
+      final = false;
+    }
+  };
+  typedef std::map<std::string, ConfigData> ConfigMap;
+
+  Configuration() {}
+  Configuration(ConfigMap &src_map) : raw_values_(src_map){}
+  Configuration(const ConfigMap &src_map) : raw_values_(src_map){}
+
+  static std::vector<std::string> GetDefaultFilenames();
+
+  // While we want this to be const, it would preclude copying Configuration
+  //    objects.  The Configuration class must not allow any mutations of
+  //    the raw_values
+  ConfigMap raw_values_;
+
+  static std::string fixCase(const std::string &in) {
+    std::string result(in);
+    for (auto & c: result) c = static_cast<char>(toupper(c));
+    return result;
+  }
+};
+
+}
+
+#endif

+ 328 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration_loader.cc

@@ -0,0 +1,328 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "configuration_loader.h"
+#include "common/logging.h"
+
+#include <fstream>
+#include <strings.h>
+#include <sstream>
+#include <map>
+#include <sys/stat.h>
+#include <rapidxml/rapidxml.hpp>
+#include <rapidxml/rapidxml_utils.hpp>
+
+namespace hdfs {
+
+/*
+ * ConfigurationLoader class
+ */
+
+#if defined(WIN32) || defined(_WIN32)
+static const char kFileSeparator = '\\';
+#else
+static const char kFileSeparator = '/';
+#endif
+
+static const char kSearchPathSeparator = ':';
+
+bool is_valid_bool(const std::string& raw) {
+  if (raw.empty()) {
+    return false;
+  }
+
+  if (!strcasecmp(raw.c_str(), "true")) {
+    return true;
+  }
+  if (!strcasecmp(raw.c_str(), "false")) {
+    return true;
+  }
+  return false;
+}
+
+bool str_to_bool(const std::string& raw) {
+  if (!strcasecmp(raw.c_str(), "true")) {
+    return true;
+  }
+
+  return false;
+}
+
+ConfigurationLoader::ConfigurationLoader() {
+  //In order to creates a configuration loader with the default search path
+  //("$HADOOP_CONF_DIR" or "/etc/hadoop/conf") we call SetDefaultSearchPath().
+  ConfigurationLoader::SetDefaultSearchPath();
+}
+
+void ConfigurationLoader::SetDefaultSearchPath() {
+  // Try (in order, taking the first valid one):
+  //    $HADOOP_CONF_DIR
+  //    /etc/hadoop/conf
+  const char * hadoop_conf_dir_env = getenv("HADOOP_CONF_DIR");
+  if (hadoop_conf_dir_env) {
+    std::stringstream ss(hadoop_conf_dir_env);
+    std::string path;
+    while (std::getline(ss, path, kSearchPathSeparator)) {
+      AddToSearchPath(path);
+    }
+  } else {
+    AddToSearchPath("/etc/hadoop/conf");
+  }
+}
+
+void ConfigurationLoader::ClearSearchPath()
+{
+  search_path_.clear();
+}
+
+void ConfigurationLoader::SetSearchPath(const std::string & searchPath)
+{
+  search_path_.clear();
+
+  std::vector<std::string> paths;
+  std::string::size_type start = 0;
+  std::string::size_type end = searchPath.find(kSearchPathSeparator);
+
+  while (end != std::string::npos) {
+     paths.push_back(searchPath.substr(start, end-start));
+     start = ++end;
+     end = searchPath.find(kSearchPathSeparator, start);
+  }
+  paths.push_back(searchPath.substr(start, searchPath.length()));
+
+  for (auto path: paths) {
+    AddToSearchPath(path);
+  }
+
+}
+
+void ConfigurationLoader::AddToSearchPath(const std::string & searchPath)
+{
+  if (searchPath.empty())
+    return;
+
+  if (searchPath.back() != kFileSeparator) {
+    std::string pathWithSlash(searchPath);
+    pathWithSlash += kFileSeparator;
+    search_path_.push_back(pathWithSlash);
+  } else {
+    search_path_.push_back(searchPath);
+  }
+}
+
+std::string ConfigurationLoader::GetSearchPath()
+{
+  std::stringstream result;
+  bool first = true;
+  for(std::string item: search_path_) {
+    if (!first) {
+      result << kSearchPathSeparator;
+    }
+
+    result << item;
+    first = false;
+  }
+
+  return result.str();
+}
+
+Status validateStream(std::istream & stream) {
+  std::streampos start = stream.tellg();
+  stream.seekg(0, std::ios::end);
+  std::streampos end = stream.tellg();
+  stream.seekg(start, std::ios::beg);
+
+  int length = end - start;
+
+  if (length <= 0 || start == -1 || end == -1)
+    return Status::Error("The configuration file is empty");
+
+  LOG_DEBUG(kFileSystem, << "validateStream will read a config file of length " << length);
+
+  std::vector<char> raw_bytes((int64_t)length + 1);
+  stream.read(&raw_bytes[0], length);
+  raw_bytes[length] = 0;
+
+  try {
+    rapidxml::xml_document<> dom;
+    dom.parse<rapidxml::parse_trim_whitespace|rapidxml::parse_validate_closing_tags>(&raw_bytes[0]);
+
+    /* File must contain a single <configuration> stanza */
+    auto config_node = dom.first_node("configuration", 0, false);
+    if (!config_node) {
+      return Status::Error("The configuration file is missing a 'configuration' tag");
+    }
+    return Status::OK();
+  } catch (const rapidxml::parse_error &e) {
+    size_t location = e.where<char>() - &raw_bytes[0];
+    std::string msg = "The configuration file has invalid xml around character " + std::to_string(location);
+    return Status::Error(msg.c_str());
+  }
+}
+
+std::vector<std::pair<std::string, Status> > ConfigurationLoader::ValidateResources(std::vector<std::string> filenames) const
+{
+  std::vector<std::pair<std::string, Status> > stats;
+  bool found;
+  for(auto file: filenames) {
+    found = false;
+    for(auto dir: search_path_) {
+      std::ifstream stream(dir + file);
+      if ( stream.is_open() ) {
+        found = true;
+        stats.push_back(std::make_pair(file,validateStream(stream)));
+      } else {
+        LOG_DEBUG(kFileSystem, << dir << file << " was not found");
+      }
+    }
+    if(!found) {
+      std::string msg("No directory in the current search path contains the file [" + file + "]");
+      stats.push_back(std::make_pair(file,Status::PathNotFound(msg.c_str())));
+    }
+  }
+  return stats;
+}
+
+bool ConfigurationLoader::UpdateMapWithFile(ConfigMap & map, const std::string & path) const
+{
+  if (path.front() == kFileSeparator) { // Absolute path
+    std::ifstream stream(path, std::ifstream::in);
+    if ( stream.is_open() ) {
+      return UpdateMapWithStream(map, stream);
+    } else {
+      return false;
+    }
+  } else { // Use search path
+    for(auto dir: search_path_) {
+      std::ifstream stream(dir + path);
+      if ( stream.is_open() ) {
+        if (UpdateMapWithStream(map, stream))
+          return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool ConfigurationLoader::UpdateMapWithStream(ConfigMap & map,
+                                              std::istream & stream) {
+  std::streampos start = stream.tellg();
+  stream.seekg(0, std::ios::end);
+  std::streampos end = stream.tellg();
+  stream.seekg(start, std::ios::beg);
+
+  int length = end - start;
+
+  if (length <= 0 || start == -1 || end == -1)
+    return false;
+
+  std::vector<char> raw_bytes((int64_t)length + 1);
+  stream.read(&raw_bytes[0], length);
+  raw_bytes[length] = 0;
+
+  return UpdateMapWithBytes(map, raw_bytes);
+}
+
+bool ConfigurationLoader::UpdateMapWithString(ConfigMap & map,
+                                                   const std::string &xml_data) {
+  if (xml_data.size() == 0) {
+    return false;
+  }
+
+  std::vector<char> raw_bytes(xml_data.begin(), xml_data.end());
+  raw_bytes.push_back('\0');
+
+  bool success = UpdateMapWithBytes(map, raw_bytes);
+
+  if (success) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool ConfigurationLoader::UpdateMapWithBytes(ConfigMap& map,
+                                                 std::vector<char>& raw_bytes) {
+  try {
+    rapidxml::xml_document<> dom;
+    dom.parse<rapidxml::parse_trim_whitespace>(&raw_bytes[0]);
+
+    /* File must contain a single <configuration> stanza */
+    auto config_node = dom.first_node("configuration", 0, false);
+    if (!config_node) {
+      return false;
+    }
+
+    /* Walk all of the <property> nodes, ignoring the rest */
+    for (auto property_node = config_node->first_node("property", 0, false);
+         property_node;
+         property_node = property_node->next_sibling("property", 0, false)) {
+      auto name_node = property_node->first_node("name", 0, false);
+      auto value_node = property_node->first_node("value", 0, false);
+
+      if (name_node && value_node) {
+        std::string final_value;
+        auto final_node = property_node->first_node("final", 0, false);
+        if (final_node) {
+          final_value = final_node->value();
+        }
+        UpdateMapWithValue(map, name_node->value(), value_node->value(), final_value);
+      }
+
+      auto name_attr = property_node->first_attribute("name", 0, false);
+      auto value_attr = property_node->first_attribute("value", 0, false);
+
+      if (name_attr && value_attr) {
+        std::string final_value;
+        auto final_attr = property_node->first_attribute("final", 0, false);
+        if (final_attr) {
+          final_value = final_attr->value();
+        }
+        UpdateMapWithValue(map, name_attr->value(), value_attr->value(), final_value);
+      }
+    }
+
+    return true;
+  } catch (const rapidxml::parse_error &e) {
+    // TODO: Capture the result in a Status object
+    return false;
+  }
+}
+
+bool ConfigurationLoader::UpdateMapWithValue(ConfigMap& map,
+                                             const std::string& key, const std::string& value,
+                                             const std::string& final_text)
+{
+  std::string caseFixedKey = Configuration::fixCase(key);
+  auto mapValue = map.find(caseFixedKey);
+  if (mapValue != map.end() && mapValue->second.final) {
+    return false;
+  }
+
+  bool final_value = false;
+  if (is_valid_bool(final_text)) {
+    final_value = str_to_bool(final_text);
+  }
+
+  map[caseFixedKey].value = value;
+  map[caseFixedKey].final = final_value;
+  return true;
+}
+
+}

+ 138 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration_loader.h

@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_CONFIGURATION_BUILDER_H_
+#define COMMON_CONFIGURATION_BUILDER_H_
+
+#include "configuration.h"
+#include "hdfspp/status.h"
+
+namespace hdfs {
+
+
+class ConfigurationLoader {
+public:
+  // Creates a new, empty Configuration object
+  // T must be Configuration or a subclass
+  template<class T>
+  T NewConfig();
+
+  /****************************************************************************
+   *                    LOADING CONFIG FILES
+   ***************************************************************************/
+
+  // Loads Configuration XML contained in a string/stream/file and returns a parsed
+  //    Configuration object.
+  //    T must be Configuration or a subclass
+  template<class T>
+  optional<T> Load(const std::string &xml_data);
+  // Streams must be seekable
+  template<class T>
+  optional<T> LoadFromStream(std::istream & stream);
+  // The ConfigurationBuilder's search path will be searched for the filename
+  //    unless it is an absolute path
+  template<class T>
+  optional<T> LoadFromFile(const std::string &filename);
+
+  // Loads Configuration XML contained in a string and produces a new copy that
+  //    is the union of the src and xml_data
+  //    Any parameters from src will be overwritten by the xml_data unless they
+  //    are marked as "final" in src.
+  //    T must be Configuration or a subclass
+  template<class T>
+  optional<T> OverlayResourceString(const T &src, const std::string &xml_data) const;
+  // Streams must be seekable
+  template<class T>
+  optional<T> OverlayResourceStream(const T &src, std::istream &stream) const;
+  //    The ConfigurationBuilder's search path will be searched for the filename
+  //       unless it is an absolute path
+  template<class T>
+  optional<T> OverlayResourceFile(const T &src, const std::string &path) const;
+
+  // Attempts to update the map.  If the update failed (because there was
+  // an existing final value, for example), returns the original map
+  template<class T>
+  optional<T> OverlayValue(const T &src, const std::string &key, const std::string &value) const;
+
+  // Returns an instance of the Configuration with all of the default resource
+  //    files loaded.
+  //    T must be Configuration or a subclass
+  template<class T>
+  optional<T> LoadDefaultResources();
+
+
+  // Returns a vector of filenames and the corresponding status when validation is attempted.
+  //    If the files can be successfully validated, then the status returned for that file is Status::OK
+  //    The files that are validated are those returned by T::GetDefaultFilenames().
+  //    T must be Configuration or a subclass
+  template<class T>
+  std::vector<std::pair<std::string, Status>> ValidateDefaultResources() const;
+
+  /****************************************************************************
+   *                    SEARCH PATH METHODS
+   ***************************************************************************/
+
+  //Creates a configuration loader with the default search path ("$HADOOP_CONF_DIR" or "/etc/hadoop/conf").
+  //If you want to explicitly set the entire search path, call ClearSearchPath() first
+  ConfigurationLoader();
+
+  // Sets the search path to the default search path (namely, "$HADOOP_CONF_DIR" or "/etc/hadoop/conf")
+  void SetDefaultSearchPath();
+
+  // Clears out the search path
+  void ClearSearchPath();
+  // Sets the search path to ":"-delimited paths
+  void SetSearchPath(const std::string & searchPath);
+  // Adds an element to the search path
+  void AddToSearchPath(const std::string & searchPath);
+  // Returns the search path in ":"-delmited form
+  std::string GetSearchPath();
+
+protected:
+  using ConfigMap = Configuration::ConfigMap;
+
+  std::vector<std::pair<std::string, Status>> ValidateResources(std::vector<std::string> filenames) const;
+
+  // Updates the src map with data from the XML in the path
+  //   The search path will be searched for the filename
+  bool UpdateMapWithFile(ConfigMap & map, const std::string & path) const;
+
+  // Updates the src map with data from the XML in the stream
+  //   The stream must be seekable
+  static bool UpdateMapWithStream(ConfigMap & map,
+                                  std::istream & stream);
+  // Updates the src map with data from the XML
+  static bool UpdateMapWithString(Configuration::ConfigMap & src,
+                                  const std::string &xml_data);
+  // Updates the src map with data from the XML
+  static bool UpdateMapWithBytes(Configuration::ConfigMap &map,
+                                 std::vector<char> &raw_bytes);
+
+  // Attempts to update the map.  If the update failed (because there was
+  // an existing final value, for example), returns false
+  static bool UpdateMapWithValue(ConfigMap& map,
+        const std::string& key, const std::string& value, const std::string& final_text);
+
+  std::vector<std::string> search_path_;
+};
+
+}
+
+#include "configuration_loader_impl.h"
+
+#endif

+ 122 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/configuration_loader_impl.h

@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_CONFIGURATION_BUILDER_IMPL_H_
+#define COMMON_CONFIGURATION_BUILDER_IMPL_H_
+
+namespace hdfs {
+
+
+template<class T>
+T ConfigurationLoader::NewConfig() {
+  return T();
+}
+
+template<class T>
+optional<T> ConfigurationLoader::Load(const std::string &xml_data) {
+  return OverlayResourceString<T>(T(), xml_data);
+}
+template<class T>
+optional<T> ConfigurationLoader::LoadFromStream(std::istream &stream) {
+  return OverlayResourceStream<T>(T(), stream);
+}
+template<class T>
+optional<T> ConfigurationLoader::LoadFromFile(const std::string &path) {
+  return OverlayResourceFile<T>(T(), path);
+}
+
+
+template<class T>
+optional<T> ConfigurationLoader::OverlayResourceFile(const T& src, const std::string &path) const {
+  ConfigMap map(src.raw_values_);
+  bool success = UpdateMapWithFile(map, path);
+
+  if (success) {
+    return std::experimental::make_optional<T>(map);
+  } else {
+    return optional<T>();
+  }
+}
+
+template<class T>
+optional<T> ConfigurationLoader::OverlayResourceStream(const T& src, std::istream & stream) const {
+  ConfigMap map(src.raw_values_);
+  bool success = UpdateMapWithStream(map, stream);
+
+  if (success) {
+    return std::experimental::make_optional<T>(map);
+  } else {
+    return optional<T>();
+  }
+}
+
+template<class T>
+optional<T> ConfigurationLoader::OverlayResourceString(const T& src, const std::string &xml_data) const {
+  if (xml_data.size() == 0) {
+    return optional<T>();
+  }
+
+  std::vector<char> raw_bytes(xml_data.begin(), xml_data.end());
+  raw_bytes.push_back('\0');
+
+  ConfigMap map(src.raw_values_);
+  bool success = UpdateMapWithBytes(map, raw_bytes);
+
+  if (success) {
+    return std::experimental::make_optional<T>(map);
+  } else {
+    return optional<T>();
+  }
+}
+
+template<class T>
+optional<T> ConfigurationLoader::OverlayValue(const T& src, const std::string &key, const std::string &value) const {
+  ConfigMap map(src.raw_values_);
+  UpdateMapWithValue(map, key, value, "");
+
+  return std::experimental::make_optional<T>(map);
+}
+
+template <class T>
+optional<T> ConfigurationLoader::LoadDefaultResources() {
+  std::vector<std::string> default_filenames = T::GetDefaultFilenames();
+
+  ConfigMap result;
+  bool success = false;
+
+  for (auto fn: default_filenames) {
+    // We succeed if we have loaded data from any file
+    success |= UpdateMapWithFile(result, fn);
+  }
+
+  if (success) {
+    return std::experimental::make_optional<T>(result);
+  } else {
+    return optional<T>();
+  }
+}
+
+template<class T>
+std::vector<std::pair<std::string, Status> > ConfigurationLoader::ValidateDefaultResources() const{
+  return ValidateResources(T::GetDefaultFilenames());
+}
+
+
+}
+
+#endif

+ 55 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/content_summary.cc

@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <hdfspp/content_summary.h>
+#include <sstream>
+#include <iomanip>
+
+namespace hdfs {
+
+ContentSummary::ContentSummary()
+: length(0),
+  filecount(0),
+  directorycount(0),
+  quota(0),
+  spaceconsumed(0),
+  spacequota(0) {
+}
+
+std::string ContentSummary::str(bool include_quota) const {
+  std::stringstream ss;
+  if(include_quota){
+    ss  << this->quota << " "
+        << spacequota << " "
+        << spaceconsumed << " ";
+  }
+  ss  << directorycount << " "
+      << filecount << " "
+      << length << " "
+      << path;
+  return ss.str();
+}
+
+std::string ContentSummary::str_du() const {
+  std::stringstream ss;
+  ss  << std::left << std::setw(10) << length
+      << path;
+  return ss.str();
+}
+
+}

+ 65 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/continuation/asio.h

@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIB_COMMON_CONTINUATION_ASIO_H_
+#define LIB_COMMON_CONTINUATION_ASIO_H_
+
+#include "continuation.h"
+#include "common/util.h"
+
+#include "hdfspp/status.h"
+
+#include <asio/connect.hpp>
+#include <asio/read.hpp>
+#include <asio/write.hpp>
+#include <asio/ip/tcp.hpp>
+#include <memory>
+
+namespace hdfs {
+namespace asio_continuation {
+
+using namespace continuation;
+
+template <class Stream, class ConstBufferSequence>
+class WriteContinuation : public Continuation {
+public:
+  WriteContinuation(std::shared_ptr<Stream>& stream, const ConstBufferSequence &buffer)
+      : stream_(stream), buffer_(buffer) {}
+
+  virtual void Run(const Next &next) override {
+    auto handler =
+        [next](const asio::error_code &ec, size_t) { next(ToStatus(ec)); };
+    asio::async_write(*stream_, buffer_, handler);
+  }
+
+private:
+  // prevent construction from raw ptr
+  WriteContinuation(Stream *stream, ConstBufferSequence &buffer);
+  std::shared_ptr<Stream> stream_;
+  ConstBufferSequence buffer_;
+};
+
+template <class Stream, class ConstBufferSequence>
+static inline Continuation *Write(std::shared_ptr<Stream> stream,
+                                  const ConstBufferSequence &buffer) {
+  return new WriteContinuation<Stream, ConstBufferSequence>(stream, buffer);
+}
+
+}
+}
+
+#endif

+ 137 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/continuation/continuation.h

@@ -0,0 +1,137 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIB_COMMON_CONTINUATION_CONTINUATION_H_
+#define LIB_COMMON_CONTINUATION_CONTINUATION_H_
+
+#include "hdfspp/status.h"
+#include "common/cancel_tracker.h"
+
+#include <functional>
+#include <memory>
+#include <vector>
+
+namespace hdfs {
+namespace continuation {
+
+class PipelineBase;
+
+/**
+ * A continuation is a fragment of runnable code whose execution will
+ * be scheduled by a \link Pipeline \endlink.
+ *
+ * The Continuation class is a build block to implement the
+ * Continuation Passing Style (CPS) in libhdfs++. In CPS, the
+ * upper-level user specifies the control flow by chaining a sequence
+ * of continuations explicitly through the \link Run() \endlink method,
+ * while in traditional imperative programming the sequences of
+ * sentences implicitly specify the control flow.
+ *
+ * See http://en.wikipedia.org/wiki/Continuation for more details.
+ **/
+class Continuation {
+public:
+  typedef std::function<void(const Status &)> Next;
+  virtual ~Continuation() = default;
+  virtual void Run(const Next &next) = 0;
+  Continuation(const Continuation &) = delete;
+  Continuation &operator=(const Continuation &) = delete;
+
+protected:
+  Continuation() = default;
+};
+
+/**
+ * A pipeline schedules the execution of a chain of \link Continuation
+ * \endlink. The pipeline schedules the execution of continuations
+ * based on their order in the pipeline, where the next parameter for
+ * each continuation points to the \link Schedule() \endlink
+ * method. That way the pipeline executes all scheduled continuations
+ * in sequence.
+ *
+ * The typical use case of a pipeline is executing continuations
+ * asynchronously. Note that a continuation calls the next
+ * continuation when it is finished. If the continuation is posted
+ * into an asynchronous event loop, invoking the next continuation
+ * can be done in the callback handler in the asynchronous event loop.
+ *
+ * The pipeline allocates the memory as follows. A pipeline is always
+ * allocated on the heap. It owns all the continuations as well as the
+ * the state specified by the user. Both the continuations and the
+ * state have the same life cycle of the pipeline. The design
+ * simplifies the problem of ensuring that the executions in the
+ * asynchronous event loop always hold valid pointers w.r.t. the
+ * pipeline. The pipeline will automatically deallocate itself right
+ * after it invokes the callback specified the user.
+ **/
+template <class State> class Pipeline {
+public:
+  typedef std::function<void(const Status &, const State &)> UserHandler;
+  static Pipeline *Create() { return new Pipeline(); }
+  static Pipeline *Create(CancelHandle cancel_handle) {
+    return new Pipeline(cancel_handle);
+  }
+  Pipeline &Push(Continuation *stage);
+  void Run(UserHandler &&handler);
+  State &state() { return state_; }
+
+private:
+  State state_;
+  std::vector<std::unique_ptr<Continuation>> routines_;
+  size_t stage_;
+  std::function<void(const Status &, const State &)> handler_;
+
+  Pipeline() : stage_(0), cancel_handle_(CancelTracker::New()) {}
+  Pipeline(CancelHandle cancel_handle) : stage_(0), cancel_handle_(cancel_handle) {}
+  ~Pipeline() = default;
+  void Schedule(const Status &status);
+  CancelHandle cancel_handle_;
+};
+
+template <class State>
+inline Pipeline<State> &Pipeline<State>::Push(Continuation *stage) {
+  routines_.emplace_back(std::unique_ptr<Continuation>(stage));
+  return *this;
+}
+
+template <class State>
+inline void Pipeline<State>::Schedule(const Status &status) {
+  // catch cancelation signalled from outside of pipeline
+  if(cancel_handle_->is_canceled()) {
+    handler_(Status::Canceled(), state_);
+    routines_.clear();
+    delete this;
+  } else if (!status.ok() || stage_ >= routines_.size()) {
+    handler_(status, state_);
+    routines_.clear();
+    delete this;
+  } else {
+    auto next = routines_[stage_].get();
+    ++stage_;
+    next->Run(std::bind(&Pipeline::Schedule, this, std::placeholders::_1));
+  }
+}
+
+template <class State> inline void Pipeline<State>::Run(UserHandler &&handler) {
+  handler_ = std::move(handler);
+  Schedule(Status::OK());
+}
+
+}
+}
+
+#endif

+ 129 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/continuation/protobuf.h

@@ -0,0 +1,129 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIBHDFSPP_COMMON_CONTINUATION_PROTOBUF_H_
+#define LIBHDFSPP_COMMON_CONTINUATION_PROTOBUF_H_
+
+#include "common/util.h"
+
+#include <asio/read.hpp>
+
+#include <google/protobuf/message_lite.h>
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
+
+#include <cassert>
+
+namespace hdfs {
+namespace continuation {
+
+template <class Stream, size_t MaxMessageSize = 512>
+struct ReadDelimitedPBMessageContinuation : public Continuation {
+  ReadDelimitedPBMessageContinuation(std::shared_ptr<Stream> stream,
+                                     ::google::protobuf::MessageLite *msg)
+      : stream_(stream), msg_(msg) {}
+
+  virtual void Run(const Next &next) override {
+    namespace pbio = google::protobuf::io;
+    auto handler = [this, next](const asio::error_code &ec, size_t) {
+      Status status;
+      if (ec) {
+        status = ToStatus(ec);
+      } else {
+        pbio::ArrayInputStream as(&buf_[0], buf_.size());
+        pbio::CodedInputStream is(&as);
+        uint32_t size = 0;
+        bool v = is.ReadVarint32(&size);
+        assert(v);
+        (void)v; //avoids unused variable warning
+        is.PushLimit(size);
+        msg_->Clear();
+        v = msg_->MergeFromCodedStream(&is);
+        assert(v);
+      }
+      next(status);
+    };
+    asio::async_read(*stream_,
+        asio::buffer(buf_),
+        std::bind(&ReadDelimitedPBMessageContinuation::CompletionHandler, this,
+                  std::placeholders::_1, std::placeholders::_2),
+        handler);
+  }
+
+private:
+  size_t CompletionHandler(const asio::error_code &ec, size_t transferred) {
+    if (ec) {
+      return 0;
+    }
+
+    size_t offset = 0, len = 0;
+    for (size_t i = 0; i + 1 < transferred && i < sizeof(int32_t); ++i) {
+      len = (len << 7) | (buf_[i] & 0x7f);
+      if ((uint8_t)buf_.at(i) < 0x80) {
+        offset = i + 1;
+        break;
+      }
+    }
+
+    assert(offset + len < buf_.size() && "Message is too big");
+    return offset ? len + offset - transferred : 1;
+  }
+
+  std::shared_ptr<Stream> stream_;
+  ::google::protobuf::MessageLite *msg_;
+  std::array<char, MaxMessageSize> buf_;
+};
+
+template <class Stream>
+struct WriteDelimitedPBMessageContinuation : Continuation {
+  WriteDelimitedPBMessageContinuation(std::shared_ptr<Stream> stream,
+                                      const google::protobuf::MessageLite *msg)
+      : stream_(stream), msg_(msg) {}
+
+  virtual void Run(const Next &next) override {
+    bool success = true;
+    buf_ = SerializeDelimitedProtobufMessage(msg_, &success);
+
+    if(!success) {
+      next(Status::Error("Unable to serialize protobuf message."));
+      return;
+    }
+
+    asio::async_write(*stream_, asio::buffer(buf_), [next](const asio::error_code &ec, size_t) { next(ToStatus(ec)); } );
+  }
+
+private:
+  std::shared_ptr<Stream> stream_;
+  const google::protobuf::MessageLite *msg_;
+  std::string buf_;
+};
+
+template <class Stream, size_t MaxMessageSize = 512>
+static inline Continuation *
+ReadDelimitedPBMessage(std::shared_ptr<Stream> stream, ::google::protobuf::MessageLite *msg) {
+  return new ReadDelimitedPBMessageContinuation<Stream, MaxMessageSize>(stream,
+                                                                        msg);
+}
+
+template <class Stream>
+static inline Continuation *
+WriteDelimitedPBMessage(std::shared_ptr<Stream> stream, ::google::protobuf::MessageLite *msg) {
+  return new WriteDelimitedPBMessageContinuation<Stream>(stream, msg);
+}
+}
+}
+#endif

+ 61 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/fsinfo.cc

@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <hdfspp/fsinfo.h>
+#include <sstream>
+#include <iomanip>
+
+namespace hdfs {
+
+FsInfo::FsInfo()
+  : capacity(0),
+    used(0),
+    remaining(0),
+    under_replicated(0),
+    corrupt_blocks(0),
+    missing_blocks(0),
+    missing_repl_one_blocks(0),
+    blocks_in_future(0) {
+}
+
+std::string FsInfo::str(const std::string fs_name) const {
+  std::string fs_name_label = "Filesystem";
+  std::string size = std::to_string(capacity);
+  std::string size_label = "Size";
+  std::string used = std::to_string(this->used);
+  std::string used_label = "Used";
+  std::string available = std::to_string(remaining);
+  std::string available_label = "Available";
+  std::string use_percentage = std::to_string(this->used * 100 / capacity) + "%";
+  std::string use_percentage_label = "Use%";
+  std::stringstream ss;
+  ss  << std::left << std::setw(std::max(fs_name.size(), fs_name_label.size())) << fs_name_label
+      << std::right << std::setw(std::max(size.size(), size_label.size()) + 2) << size_label
+      << std::right << std::setw(std::max(used.size(), used_label.size()) + 2) << used_label
+      << std::right << std::setw(std::max(available.size(), available_label.size()) + 2) << available_label
+      << std::right << std::setw(std::max(use_percentage.size(), use_percentage_label.size()) + 2) << use_percentage_label
+      << std::endl
+      << std::left << std::setw(std::max(fs_name.size(), fs_name_label.size())) << fs_name
+      << std::right << std::setw(std::max(size.size(), size_label.size()) + 2) << size
+      << std::right << std::setw(std::max(used.size(), used_label.size()) + 2) << used
+      << std::right << std::setw(std::max(available.size(), available_label.size()) + 2) << available
+      << std::right << std::setw(std::max(use_percentage.size(), use_percentage_label.size()) + 2) << use_percentage;
+  return ss.str();
+}
+
+}

+ 210 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/hdfs_configuration.cc

@@ -0,0 +1,210 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common/hdfs_configuration.h"
+#include "common/logging.h"
+
+#include <exception>
+
+#ifndef DEFAULT_SCHEME
+  #define DEFAULT_SCHEME "hdfs://"
+#endif
+
+namespace hdfs {
+
+// Constructs a configuration with no search path and no resources loaded
+HdfsConfiguration::HdfsConfiguration() : Configuration() {}
+
+// Constructs a configuration with a copy of the input data
+HdfsConfiguration::HdfsConfiguration(ConfigMap &src_map) : Configuration(src_map) {}
+HdfsConfiguration::HdfsConfiguration(const ConfigMap &src_map) : Configuration(src_map) {}
+
+std::vector<std::string> HdfsConfiguration::GetDefaultFilenames() {
+  auto result = Configuration::GetDefaultFilenames();
+  result.push_back("hdfs-site.xml");
+  return result;
+}
+
+// Sets a value iff the optional<T> has a value
+template <class T, class U>
+void OptionalSet(T& target, optional<U> value) {
+  if (value)
+    target = *value;
+}
+
+std::vector<std::string> SplitOnComma(const std::string &s, bool include_empty_strings) {
+  std::vector<std::string> res;
+  std::string buf;
+
+  for(unsigned int i=0;i<s.size();i++) {
+    char c = s[i];
+    if(c != ',') {
+      buf += c;
+    } else {
+      if(!include_empty_strings && buf.empty()) {
+        // Skip adding empty strings if needed
+        continue;
+      }
+      res.push_back(buf);
+      buf.clear();
+    }
+  }
+
+  if(buf.size() > 0)
+    res.push_back(buf);
+
+  return res;
+}
+
+std::string RemoveSpaces(const std::string &str) {
+  std::string res;
+  for(unsigned int i=0; i<str.size(); i++) {
+    char curr = str[i];
+    if(curr != ' ') {
+      res += curr;
+    }
+  }
+  return res;
+}
+
+// Prepend hdfs:// to string if there isn't already a scheme
+// Converts unset optional into empty string
+std::string PrependHdfsScheme(optional<std::string> str) {
+  if(!str)
+    return "";
+
+  if(str.value().find("://") == std::string::npos)
+    return DEFAULT_SCHEME + str.value();
+  return str.value();
+}
+
+// It's either use this, goto, or a lot of returns w/ status checks
+struct ha_parse_error : public std::exception {
+  std::string desc;
+  ha_parse_error(const std::string &val) : desc(val) {};
+  const char *what() const noexcept override  {
+    return desc.c_str();
+  };
+};
+
+std::vector<NamenodeInfo> HdfsConfiguration::LookupNameService(const std::string &nameservice) {
+  LOG_TRACE(kRPC, << "HDFSConfiguration@" << this << "::LookupNameService( nameservice=" << nameservice<< " ) called");
+
+  std::vector<NamenodeInfo> namenodes;
+  try {
+    // Find namenodes that belong to nameservice
+    std::vector<std::string> namenode_ids;
+    {
+      std::string service_nodes = std::string("dfs.ha.namenodes.") + nameservice;
+      optional<std::string> namenode_list = Get(service_nodes);
+      if(namenode_list)
+        namenode_ids = SplitOnComma(namenode_list.value(), false);
+      else
+        throw ha_parse_error("unable to find " + service_nodes);
+
+      for(unsigned int i=0; i<namenode_ids.size(); i++) {
+        namenode_ids[i] = RemoveSpaces(namenode_ids[i]);
+        LOG_INFO(kRPC, << "Namenode: " << namenode_ids[i]);
+      }
+    }
+
+    // should this error if we only find 1 NN?
+    if(namenode_ids.empty())
+      throw ha_parse_error("No namenodes found for nameservice " + nameservice);
+
+    // Get URI for each HA namenode
+    for(auto node_id=namenode_ids.begin(); node_id != namenode_ids.end(); node_id++) {
+      // find URI
+      std::string dom_node_name = std::string("dfs.namenode.rpc-address.") + nameservice + "." + *node_id;
+
+      URI uri;
+      try {
+        uri = URI::parse_from_string(PrependHdfsScheme(Get(dom_node_name)));
+      } catch (const uri_parse_error) {
+        throw ha_parse_error("unable to find " + dom_node_name);
+      }
+
+      if(uri.str() == "") {
+        LOG_WARN(kRPC, << "Attempted to read info for nameservice " << nameservice << " node " << dom_node_name << " but didn't find anything.")
+      } else {
+        LOG_INFO(kRPC, << "Read the following HA Namenode URI from config" << uri.GetDebugString());
+      }
+
+      NamenodeInfo node(nameservice, *node_id, uri);
+      namenodes.push_back(node);
+    }
+  } catch (ha_parse_error e) {
+    LOG_ERROR(kRPC, << "HA cluster detected but failed because : " << e.what());
+    namenodes.clear(); // Don't return inconsistent view
+  }
+  return namenodes;
+}
+
+// Interprets the resources to build an Options object
+Options HdfsConfiguration::GetOptions() {
+  Options result;
+
+  OptionalSet(result.rpc_timeout, GetInt(kDfsClientSocketTimeoutKey));
+  OptionalSet(result.rpc_connect_timeout, GetInt(kIpcClientConnectTimeoutKey));
+  OptionalSet(result.max_rpc_retries, GetInt(kIpcClientConnectMaxRetriesKey));
+  OptionalSet(result.rpc_retry_delay_ms, GetInt(kIpcClientConnectRetryIntervalKey));
+  OptionalSet(result.defaultFS, GetUri(kFsDefaultFsKey));
+  OptionalSet(result.block_size, GetInt(kDfsBlockSizeKey));
+
+
+  OptionalSet(result.failover_max_retries, GetInt(kDfsClientFailoverMaxAttempts));
+  OptionalSet(result.failover_connection_max_retries, GetInt(kDfsClientFailoverConnectionRetriesOnTimeouts));
+
+  // Load all nameservices if it's HA configured
+  optional<std::string> dfs_nameservices = Get("dfs.nameservices");
+  if(dfs_nameservices) {
+    std::string nameservice = dfs_nameservices.value();
+
+    std::vector<std::string> all_services = SplitOnComma(nameservice, false);
+
+    // Look up nodes for each nameservice so that FileSystem object can support
+    // multiple nameservices by ID.
+    for(const std::string &service : all_services) {
+      if(service.empty())
+        continue;
+
+      LOG_DEBUG(kFileSystem, << "Parsing info for nameservice: " << service);
+      std::vector<NamenodeInfo> nodes = LookupNameService(service);
+      if(nodes.empty()) {
+        LOG_WARN(kFileSystem, << "Nameservice \"" << service << "\" declared in config but nodes aren't");
+      } else {
+        result.services[service] = nodes;
+      }
+    }
+  }
+
+  optional<std::string> authentication_value = Get(kHadoopSecurityAuthenticationKey);
+
+  if (authentication_value ) {
+      std::string fixed_case_value = fixCase(authentication_value.value());
+      if (fixed_case_value == fixCase(kHadoopSecurityAuthentication_kerberos))
+          result.authentication = Options::kKerberos;
+      else
+          result.authentication = Options::kSimple;
+  }
+
+  return result;
+}
+
+
+}

+ 70 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/hdfs_configuration.h

@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_HDFS_CONFIGURATION_H_
+#define COMMON_HDFS_CONFIGURATION_H_
+
+#include "common/configuration.h"
+#include "hdfspp/options.h"
+
+#include <string>
+#include <map>
+#include <vector>
+#include <set>
+#include <istream>
+#include <stdint.h>
+
+namespace hdfs {
+
+class HdfsConfiguration : public Configuration {
+  public:
+    // Interprets the resources to build an Options object
+    Options GetOptions();
+
+    // Keys to look for in the configuration file
+    static constexpr const char * kFsDefaultFsKey = "fs.defaultFS";
+    static constexpr const char * kDfsClientSocketTimeoutKey = "dfs.client.socket-timeout";
+    static constexpr const char * kIpcClientConnectTimeoutKey = "ipc.client.connect.timeout";
+    static constexpr const char * kIpcClientConnectMaxRetriesKey = "ipc.client.connect.max.retries";
+    static constexpr const char * kIpcClientConnectRetryIntervalKey = "ipc.client.connect.retry.interval";
+    static constexpr const char * kHadoopSecurityAuthenticationKey = "hadoop.security.authentication";
+    static constexpr const char * kHadoopSecurityAuthentication_simple = "simple";
+    static constexpr const char * kHadoopSecurityAuthentication_kerberos = "kerberos";
+    static constexpr const char * kDfsBlockSizeKey = "dfs.blocksize";
+
+    static constexpr const char * kDfsClientFailoverMaxAttempts = "dfs.client.failover.max.attempts";
+    static constexpr const char * kDfsClientFailoverConnectionRetriesOnTimeouts = "dfs.client.failover.connection.retries.on.timeouts";
+
+
+private:
+    friend class ConfigurationLoader;
+
+    // Constructs a configuration with no search path and no resources loaded
+    HdfsConfiguration();
+
+    // Constructs a configuration with some static data
+    HdfsConfiguration(ConfigMap &src_map);
+    HdfsConfiguration(const ConfigMap &src_map);
+
+    static std::vector<std::string> GetDefaultFilenames();
+    std::vector<NamenodeInfo> LookupNameService(const std::string &nameservice);
+};
+
+}
+
+#endif

+ 146 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/hdfs_ioservice.cc

@@ -0,0 +1,146 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hdfs_ioservice.h"
+
+#include <thread>
+#include <mutex>
+#include <vector>
+
+#include "common/logging.h"
+
+namespace hdfs {
+
+IoService::~IoService() {}
+
+IoService *IoService::New() {
+  return new IoServiceImpl();
+}
+
+std::shared_ptr<IoService> IoService::MakeShared() {
+  return std::make_shared<IoServiceImpl>();
+}
+
+
+unsigned int IoServiceImpl::InitDefaultWorkers() {
+  LOG_TRACE(kAsyncRuntime, << "IoServiceImpl::InitDefaultWorkers@" << this << " called.");
+  unsigned int logical_thread_count = std::thread::hardware_concurrency();
+#ifndef DISABLE_CONCURRENT_WORKERS
+  if(logical_thread_count < 1) {
+    LOG_WARN(kAsyncRuntime, << "IoServiceImpl::InitDefaultWorkers did not detect any logical processors.  Defaulting to 1 worker thread.");
+  } else {
+    LOG_DEBUG(kRPC, << "IoServiceImpl::InitDefaultWorkers detected " << logical_thread_count << " logical threads and will spawn a worker for each.");
+  }
+#else
+  if(logical_thread_count > 0) {
+    LOG_DEBUG(kAsyncRuntime, << "IoServiceImpl::InitDefaultWorkers: " << logical_thread_count << " threads available.  Concurrent workers are disabled so 1 worker thread will be used");
+  }
+  logical_thread_count = 1;
+#endif
+  return InitWorkers(logical_thread_count);
+}
+
+unsigned int IoServiceImpl::InitWorkers(unsigned int thread_count) {
+#ifdef DISABLED_CONCURRENT_WORKERS
+  LOG_DEBUG(kAsyncRuntime, << "IoServiceImpl::InitWorkers: " << thread_count << " threads specified but concurrent workers are disabled so 1 will be used");
+  thread_count = 1;
+#endif
+  unsigned int created_threads = 0;
+  for(unsigned int i=0; i<thread_count; i++) {
+    bool created = AddWorkerThread();
+    if(created) {
+      created_threads++;
+    } else {
+      LOG_DEBUG(kAsyncRuntime, << "IoServiceImpl@" << this << " ::InitWorkers failed to create a worker thread");
+    }
+  }
+  if(created_threads != thread_count) {
+    LOG_WARN(kAsyncRuntime, << "IoServiceImpl@" << this << " ::InitWorkers attempted to create "
+                            << thread_count << " but only created " << created_threads
+                            << " worker threads.  Make sure this process has adequate resources.");
+  }
+  return created_threads;
+}
+
+bool IoServiceImpl::AddWorkerThread() {
+  mutex_guard state_lock(state_lock_);
+  auto async_worker = [this]() {
+    this->ThreadStartHook();
+    this->Run();
+    this->ThreadExitHook();
+  };
+  worker_threads_.push_back(WorkerPtr( new std::thread(async_worker)) );
+  return true;
+}
+
+
+void IoServiceImpl::ThreadStartHook() {
+  mutex_guard state_lock(state_lock_);
+  LOG_DEBUG(kAsyncRuntime, << "Worker thread #" << std::this_thread::get_id() << " for IoServiceImpl@" << this << " starting");
+}
+
+void IoServiceImpl::ThreadExitHook() {
+  mutex_guard state_lock(state_lock_);
+  LOG_DEBUG(kAsyncRuntime, << "Worker thread #" << std::this_thread::get_id() << " for IoServiceImpl@" << this << " exiting");
+}
+
+void IoServiceImpl::PostTask(std::function<void(void)>& asyncTask) {
+  io_service_.post(asyncTask);
+}
+
+void IoServiceImpl::WorkerDeleter::operator()(std::thread *t) {
+  // It is far too easy to destroy the filesystem (and thus the threadpool)
+  //     from within one of the worker threads, leading to a deadlock.  Let's
+  //     provide some explicit protection.
+  if(t->get_id() == std::this_thread::get_id()) {
+    LOG_ERROR(kAsyncRuntime, << "FileSystemImpl::WorkerDeleter::operator(treadptr="
+                             << t << ") : FATAL: Attempted to destroy a thread pool"
+                             "from within a callback of the thread pool!");
+  }
+  t->join();
+  delete t;
+}
+
+// As long as this just forwards to an asio::io_service method it doesn't need a lock
+void IoServiceImpl::Run() {
+  // The IoService executes callbacks provided by library users in the context of worker threads,
+  // there is no way of preventing those callbacks from throwing but we can at least prevent them
+  // from escaping this library and crashing the process.
+
+  // As recommended in http://www.boost.org/doc/libs/1_39_0/doc/html/boost_asio/reference/io_service.html#boost_asio.reference.io_service.effect_of_exceptions_thrown_from_handlers
+  asio::io_service::work work(io_service_);
+  while(true)
+  {
+    try
+    {
+      io_service_.run();
+      break;
+    } catch (const std::exception & e) {
+      LOG_WARN(kFileSystem, << "Unexpected exception in libhdfspp worker thread: " << e.what());
+    } catch (...) {
+      LOG_WARN(kFileSystem, << "Unexpected value not derived from std::exception in libhdfspp worker thread");
+    }
+  }
+}
+
+unsigned int IoServiceImpl::get_worker_thread_count() {
+  mutex_guard state_lock(state_lock_);
+  return worker_threads_.size();
+}
+
+}

+ 79 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/hdfs_ioservice.h

@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_HDFS_IOSERVICE_H_
+#define COMMON_HDFS_IOSERVICE_H_
+
+#include "hdfspp/hdfspp.h"
+
+#include <asio/io_service.hpp>
+#include "common/util.h"
+
+#include <mutex>
+#include <thread>
+
+namespace hdfs {
+
+// Uncomment this to determine if issues are due to concurrency or logic faults
+// If tests still fail with concurrency disabled it's most likely a logic bug
+#define DISABLE_CONCURRENT_WORKERS
+
+/*
+ *  A thin wrapper over the asio::io_service with a few extras
+ *    -manages it's own worker threads
+ *    -some helpers for sharing with multiple modules that need to do async work
+ */
+
+class IoServiceImpl : public IoService {
+ public:
+  IoServiceImpl() {}
+
+  virtual unsigned int InitDefaultWorkers() override;
+  virtual unsigned int InitWorkers(unsigned int thread_count) override;
+  virtual void PostTask(std::function<void(void)>& asyncTask) override;
+  virtual void Run() override;
+  virtual void Stop() override { io_service_.stop(); }
+
+  // Add a single worker thread, in the common case try to avoid this in favor
+  // of Init[Default]Workers. Public for use by tests and rare cases where a
+  // client wants very explicit control of threading for performance reasons
+  // e.g. pinning threads to NUMA nodes.
+  bool AddWorkerThread();
+
+  // Be very careful about using this: HDFS-10241
+  ::asio::io_service &io_service() { return io_service_; }
+  unsigned int get_worker_thread_count();
+ private:
+  std::mutex state_lock_;
+  ::asio::io_service io_service_;
+
+  // For doing logging + resource manager updates on thread start/exit
+  void ThreadStartHook();
+  void ThreadExitHook();
+
+  // Support for async worker threads
+  struct WorkerDeleter {
+    void operator()(std::thread *t);
+  };
+  typedef std::unique_ptr<std::thread, WorkerDeleter> WorkerPtr;
+  std::vector<WorkerPtr> worker_threads_;
+};
+
+}
+
+#endif

+ 89 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/libhdfs_events_impl.cc

@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "libhdfs_events_impl.h"
+
+#include <exception>
+
+namespace hdfs {
+
+/**
+ * Default no-op callback implementations
+ **/
+
+LibhdfsEvents::LibhdfsEvents() : fs_callback(std::experimental::nullopt),
+                                 file_callback(std::experimental::nullopt)
+{}
+
+LibhdfsEvents::~LibhdfsEvents() {}
+
+void LibhdfsEvents::set_fs_callback(const fs_event_callback & callback) {
+  fs_callback = callback;
+}
+
+void LibhdfsEvents::set_file_callback(const file_event_callback & callback) {
+  file_callback = callback;
+}
+
+void LibhdfsEvents::clear_fs_callback() {
+  fs_callback = std::experimental::nullopt;
+}
+
+void LibhdfsEvents::clear_file_callback() {
+  file_callback = std::experimental::nullopt;
+}
+
+event_response LibhdfsEvents::call(const char * event,
+                                   const char * cluster,
+                                   int64_t value)
+{
+  if (fs_callback) {
+    try {
+      return fs_callback->operator()(event, cluster, value);
+    } catch (const std::exception& e) {
+      return event_response::make_caught_std_exception(e.what());
+    } catch (...) {
+      // Arguably calling abort() here would serve as appropriate
+      // punishment for those who throw garbage that isn't derived
+      // from std::exception...
+      return event_response::make_caught_unknown_exception();
+    }
+  } else {
+    return event_response::make_ok();
+  }
+}
+
+event_response LibhdfsEvents::call(const char * event,
+                                   const char * cluster,
+                                   const char * file,
+                                   int64_t value)
+{
+  if (file_callback) {
+    try {
+      return file_callback->operator()(event, cluster, file, value);
+    } catch (const std::exception& e) {
+      return event_response::make_caught_std_exception(e.what());
+    } catch (...) {
+      return event_response::make_caught_unknown_exception();
+    }
+  } else {
+    return event_response::make_ok();
+  }
+}
+
+}

+ 59 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/libhdfs_events_impl.h

@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBHDFSPP_COMMON_LIBHDFSEVENTS_IMPL
+#define LIBHDFSPP_COMMON_LIBHDFSEVENTS_IMPL
+
+#include "hdfspp/events.h"
+#include "common/optional_wrapper.h"
+
+#include <functional>
+
+namespace hdfs {
+
+/**
+ * Users can specify event handlers.  Default is a no-op handler.
+ **/
+class LibhdfsEvents {
+public:
+  LibhdfsEvents();
+  virtual ~LibhdfsEvents();
+
+  void set_fs_callback(const fs_event_callback & callback);
+  void set_file_callback(const file_event_callback & callback);
+  void clear_fs_callback();
+  void clear_file_callback();
+
+  event_response call(const char *event,
+                      const char *cluster,
+                      int64_t value);
+
+  event_response call(const char *event,
+                      const char *cluster,
+                      const char *file,
+                      int64_t value);
+private:
+  // Called when fs events occur
+  std::experimental::optional<fs_event_callback> fs_callback;
+
+  // Called when file events occur
+  std::experimental::optional<file_event_callback> file_callback;
+};
+
+}
+#endif

+ 100 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/locks.cc

@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hdfspp/locks.h"
+
+#include <mutex>
+
+
+namespace hdfs {
+
+LockGuard::LockGuard(Mutex *m) : _mtx(m) {
+  if(!m) {
+    throw LockFailure("LockGuard passed invalid (null) Mutex pointer");
+  }
+  _mtx->lock();
+}
+
+LockGuard::~LockGuard() {
+  if(_mtx) {
+    _mtx->unlock();
+  }
+}
+
+
+// Basic mutexes to use as default.  Just a wrapper around C++11 std::mutex.
+class DefaultMutex : public Mutex {
+ public:
+  DefaultMutex() {}
+
+  void lock() override {
+    // Could throw in here if the implementation couldn't lock for some reason.
+    _mtx.lock();
+  }
+
+  void unlock() override {
+    _mtx.unlock();
+  }
+
+  std::string str() override {
+    return "DefaultMutex";
+  }
+ private:
+  std::mutex _mtx;
+};
+
+DefaultMutex defaultTestMutex;
+DefaultMutex defaultGssapiMutex;
+
+// LockManager static var instantiation
+Mutex *LockManager::TEST_default_mutex = &defaultTestMutex;
+Mutex *LockManager::gssapiMtx = &defaultGssapiMutex;
+std::mutex LockManager::_state_lock;
+bool LockManager::_finalized = false;
+
+bool LockManager::InitLocks(Mutex *gssapi) {
+  std::lock_guard<std::mutex> guard(_state_lock);
+
+  // You get once shot to set this - swapping the locks
+  // out while in use gets risky.  It can still be done by
+  // using the Mutex as a proxy object if one understands
+  // the implied risk of doing so.
+  if(_finalized)
+    return false;
+
+  gssapiMtx = gssapi;
+  _finalized = true;
+  return true;
+}
+
+Mutex *LockManager::getGssapiMutex() {
+  std::lock_guard<std::mutex> guard(_state_lock);
+  return gssapiMtx;
+}
+
+Mutex *LockManager::TEST_get_default_mutex() {
+  return TEST_default_mutex;
+}
+
+void LockManager::TEST_reset_manager() {
+  _finalized = false;
+  // user still responsible for cleanup
+  gssapiMtx = &defaultGssapiMutex;
+}
+
+} // end namepace hdfs

+ 227 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/logging.cc

@@ -0,0 +1,227 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "logging.h"
+
+#include <ctime>
+#include <cstring>
+#include <thread>
+#include <iostream>
+#include <sstream>
+
+namespace hdfs
+{
+
+LogManager::LogManager() {}
+std::unique_ptr<LoggerInterface> LogManager::logger_impl_(new StderrLogger());
+std::mutex LogManager::impl_lock_;
+uint32_t LogManager::component_mask_ = 0xFFFFFFFF;
+uint32_t LogManager::level_threshold_ = kWarning;
+
+void LogManager::DisableLogForComponent(LogSourceComponent c) {
+  // AND with all bits other than one we want to unset
+  std::lock_guard<std::mutex> impl_lock(impl_lock_);
+  component_mask_ &= ~c;
+}
+
+void LogManager::EnableLogForComponent(LogSourceComponent c) {
+  // OR with bit to set
+  std::lock_guard<std::mutex> impl_lock(impl_lock_);
+  component_mask_ |= c;
+}
+
+void LogManager::SetLogLevel(LogLevel level) {
+  std::lock_guard<std::mutex> impl_lock(impl_lock_);
+  level_threshold_ = level;
+}
+
+void LogManager::Write(const LogMessage& msg) {
+  std::lock_guard<std::mutex> impl_lock(impl_lock_);
+  if(logger_impl_)
+    logger_impl_->Write(msg);
+}
+
+void LogManager::SetLoggerImplementation(std::unique_ptr<LoggerInterface> impl) {
+  std::lock_guard<std::mutex> impl_lock(impl_lock_);
+  logger_impl_.reset(impl.release());
+}
+
+
+/**
+ *  Simple plugin to dump logs to stderr
+ **/
+void StderrLogger::Write(const LogMessage& msg) {
+  std::stringstream formatted;
+
+  if(show_level_)
+    formatted << msg.level_string();
+
+  if(show_component_)
+    formatted << msg.component_string();
+
+  if(show_timestamp_) {
+    time_t current_time = std::time(nullptr);
+    char timestr[128];
+    memset(timestr, 0, 128);
+    int res = std::strftime(timestr, 128, "%a %b %e %H:%M:%S %Y", std::localtime(&current_time));
+    if(res > 0) {
+      formatted << '[' << (const char*)timestr << ']';
+    } else {
+      formatted << "[Error formatting timestamp]";
+    }
+  }
+
+  if(show_component_) {
+    formatted << "[Thread id = " << std::this_thread::get_id() << ']';
+  }
+
+  if(show_file_) {
+    //  __FILE__ contains absolute path, which is giant if doing a build inside the
+    //  Hadoop tree.  Trim down to relative to libhdfspp/
+    std::string abs_path(msg.file_name());
+    size_t rel_path_idx = abs_path.find("libhdfspp/");
+    //  Default to whole string if library is being built in an odd way
+    if(rel_path_idx == std::string::npos)
+      rel_path_idx = 0;
+
+    formatted << '[' << (const char*)&abs_path[rel_path_idx] << ":" << msg.file_line() << ']';
+  }
+
+  std::cerr << formatted.str() << "    " << msg.MsgString() << std::endl;
+}
+
+void StderrLogger::set_show_timestamp(bool show) {
+  show_timestamp_ = show;
+}
+void StderrLogger::set_show_level(bool show) {
+  show_level_ = show;
+}
+void StderrLogger::set_show_thread(bool show) {
+  show_thread_ = show;
+}
+void StderrLogger::set_show_component(bool show) {
+  show_component_ = show;
+}
+
+
+LogMessage::~LogMessage() {
+  LogManager::Write(*this);
+}
+
+LogMessage& LogMessage::operator<<(const std::string *str) {
+  if(str)
+    msg_buffer_ << str;
+  else
+    msg_buffer_ << "<nullptr>";
+  return *this;
+}
+
+LogMessage& LogMessage::operator<<(const std::string& str) {
+  msg_buffer_ << str;
+  return *this;
+}
+
+LogMessage& LogMessage::operator<<(const ::asio::ip::tcp::endpoint& endpoint) {
+  msg_buffer_ << endpoint;
+  return *this;
+}
+
+LogMessage& LogMessage::operator<<(const char *str) {
+  if(str)
+    msg_buffer_ << str;
+  else
+    msg_buffer_ << "<nullptr>";
+  return *this;
+}
+
+LogMessage& LogMessage::operator<<(bool val) {
+  if(val)
+    msg_buffer_ << "true";
+  else
+    msg_buffer_ << "false";
+  return *this;
+}
+
+LogMessage& LogMessage::operator<<(int32_t val) {
+  msg_buffer_ << val;
+  return *this;
+}
+
+LogMessage& LogMessage::operator<<(uint32_t val) {
+  msg_buffer_ << val;
+  return *this;
+}
+
+LogMessage& LogMessage::operator<<(int64_t val) {
+  msg_buffer_ << val;
+  return *this;
+}
+
+LogMessage& LogMessage::operator<<(uint64_t val) {
+  msg_buffer_ << val;
+  return *this;
+}
+
+LogMessage& LogMessage::operator<<(void *ptr) {
+  msg_buffer_ << ptr;
+  return *this;
+}
+
+
+LogMessage& LogMessage::operator<<(const std::thread::id& tid) {
+  msg_buffer_ << tid;
+  return *this;
+}
+
+std::string LogMessage::MsgString() const {
+  return msg_buffer_.str();
+}
+
+const char * kLevelStrings[5] = {
+  "[TRACE ]",
+  "[DEBUG ]",
+  "[INFO  ]",
+  "[WARN  ]",
+  "[ERROR ]"
+};
+
+const char * LogMessage::level_string() const {
+  return kLevelStrings[level_];
+}
+
+const char * kComponentStrings[6] = {
+  "[Unknown       ]",
+  "[RPC           ]",
+  "[BlockReader   ]",
+  "[FileHandle    ]",
+  "[FileSystem    ]",
+  "[Async Runtime ]",
+};
+
+const char * LogMessage::component_string() const {
+  switch(component_) {
+    case kRPC: return kComponentStrings[1];
+    case kBlockReader: return kComponentStrings[2];
+    case kFileHandle: return kComponentStrings[3];
+    case kFileSystem: return kComponentStrings[4];
+    case kAsyncRuntime: return kComponentStrings[5];
+    default: return kComponentStrings[0];
+  }
+}
+
+}

+ 217 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/logging.h

@@ -0,0 +1,217 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIB_COMMON_LOGGING_H_
+#define LIB_COMMON_LOGGING_H_
+
+#include <asio/ip/tcp.hpp>
+
+#include "hdfspp/log.h"
+
+#include <iostream>
+#include <sstream>
+#include <mutex>
+#include <memory>
+#include <thread>
+
+#include <asio/ip/tcp.hpp>
+
+namespace hdfs {
+
+/**
+ *  Logging mechanism to provide lightweight logging to stderr as well as
+ *  as a callback mechanism to allow C clients and larger third party libs
+ *  to be used to handle logging.  When adding a new log message to the
+ *  library use the macros defined below (LOG_TRACE..LOG_ERROR) rather than
+ *  using the LogMessage and LogManager objects directly.
+ **/
+
+enum LogLevel {
+  kTrace     = 0,
+  kDebug     = 1,
+  kInfo      = 2,
+  kWarning   = 3,
+  kError     = 4,
+};
+
+enum LogSourceComponent {
+  kUnknown      = 1 << 0,
+  kRPC          = 1 << 1,
+  kBlockReader  = 1 << 2,
+  kFileHandle   = 1 << 3,
+  kFileSystem   = 1 << 4,
+  kAsyncRuntime = 1 << 5,
+};
+
+#define LOG_TRACE(C, MSG) do { \
+if(LogManager::ShouldLog(kTrace,C)) { \
+  LogMessage(kTrace, __FILE__, __LINE__, C) MSG; \
+}} while (0);
+
+
+#define LOG_DEBUG(C, MSG) do { \
+if(LogManager::ShouldLog(kDebug,C)) { \
+  LogMessage(kDebug, __FILE__, __LINE__, C) MSG; \
+}} while (0);
+
+#define LOG_INFO(C, MSG) do { \
+if(LogManager::ShouldLog(kInfo,C)) { \
+  LogMessage(kInfo, __FILE__, __LINE__, C) MSG; \
+}} while (0);
+
+#define LOG_WARN(C, MSG) do { \
+if(LogManager::ShouldLog(kWarning,C)) { \
+  LogMessage(kWarning, __FILE__, __LINE__, C) MSG; \
+}} while (0);
+
+#define LOG_ERROR(C, MSG) do { \
+if(LogManager::ShouldLog(kError,C)) { \
+  LogMessage(kError, __FILE__, __LINE__, C) MSG; \
+}} while (0);
+
+
+class LogMessage;
+
+class LoggerInterface {
+ public:
+  LoggerInterface() {};
+  virtual ~LoggerInterface() {};
+
+  /**
+   *  User defined handling messages, common case would be printing somewhere.
+   **/
+  virtual void Write(const LogMessage& msg) = 0;
+};
+
+/**
+ *  StderrLogger unsuprisingly dumps messages to stderr.
+ *  This is the default logger if nothing else is explicitly set.
+ **/
+class StderrLogger : public LoggerInterface {
+ public:
+  StderrLogger() : show_timestamp_(true), show_level_(true),
+                   show_thread_(true), show_component_(true),
+                   show_file_(true) {}
+  void Write(const LogMessage& msg);
+  void set_show_timestamp(bool show);
+  void set_show_level(bool show);
+  void set_show_thread(bool show);
+  void set_show_component(bool show);
+ private:
+  bool show_timestamp_;
+  bool show_level_;
+  bool show_thread_;
+  bool show_component_;
+  bool show_file_;
+};
+
+
+/**
+ *  LogManager provides a thread safe static interface to the underlying
+ *  logger implementation.
+ **/
+class LogManager {
+ friend class LogMessage;
+ public:
+  //  allow easy inlining
+  static bool ShouldLog(LogLevel level, LogSourceComponent source) {
+    std::lock_guard<std::mutex> impl_lock(impl_lock_);
+    if(level < level_threshold_)
+      return false;
+    if(!(source & component_mask_))
+      return false;
+    return true;
+  }
+  static void Write(const LogMessage & msg);
+  static void EnableLogForComponent(LogSourceComponent c);
+  static void DisableLogForComponent(LogSourceComponent c);
+  static void SetLogLevel(LogLevel level);
+  static void SetLoggerImplementation(std::unique_ptr<LoggerInterface> impl);
+
+ private:
+  // don't create instances of this
+  LogManager();
+  // synchronize all unsafe plugin calls
+  static std::mutex impl_lock_;
+  static std::unique_ptr<LoggerInterface> logger_impl_;
+  // component and level masking
+  static uint32_t component_mask_;
+  static uint32_t level_threshold_;
+};
+
+/**
+ *  LogMessage contains message text, along with other metadata about the message.
+ *  Note:  For performance reasons a set of macros (see top of file) is used to
+ *  create these inside of an if block.  Do not instantiate these directly, doing
+ *  so will cause the message to be uncontitionally logged.  This minor inconvinience
+ *  gives us a ~20% performance increase in the (common) case where few messages
+ *  are worth logging; std::stringstream is expensive to construct.
+ **/
+class LogMessage {
+ friend class LogManager;
+ public:
+  LogMessage(const LogLevel &l, const char *file, int line,
+             LogSourceComponent component = kUnknown) :
+             level_(l), component_(component), origin_file_(file), origin_line_(line){}
+
+  ~LogMessage();
+
+  const char *level_string() const;
+  const char *component_string() const;
+  LogLevel level() const {return level_; }
+  LogSourceComponent component() const {return component_; }
+  int file_line() const {return origin_line_; }
+  const char * file_name() const {return origin_file_; }
+
+  //print as-is, indicates when a nullptr was passed in
+  LogMessage& operator<<(const char *);
+  LogMessage& operator<<(const std::string*);
+  LogMessage& operator<<(const std::string&);
+
+  //convert to a string "true"/"false"
+  LogMessage& operator<<(bool);
+
+  //integral types
+  LogMessage& operator<<(int32_t);
+  LogMessage& operator<<(uint32_t);
+  LogMessage& operator<<(int64_t);
+  LogMessage& operator<<(uint64_t);
+
+  //print address as hex
+  LogMessage& operator<<(void *);
+
+  //asio types
+  LogMessage& operator<<(const ::asio::ip::tcp::endpoint& endpoint);
+
+  //thread and mutex types
+  LogMessage& operator<<(const std::thread::id& tid);
+
+
+  std::string MsgString() const;
+
+ private:
+  LogLevel level_;
+  LogSourceComponent component_;
+  const char *origin_file_;
+  const int origin_line_;
+  std::stringstream msg_buffer_;
+};
+
+}
+
+#endif

+ 178 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/namenode_info.cc

@@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "namenode_info.h"
+
+#include "common/util.h"
+#include "common/logging.h"
+
+#include <sstream>
+#include <utility>
+#include <future>
+#include <memory>
+
+namespace hdfs {
+
+ResolvedNamenodeInfo& ResolvedNamenodeInfo::operator=(const NamenodeInfo &info) {
+  nameservice = info.nameservice;
+  name = info.name;
+  uri = info.uri;
+  return *this;
+}
+
+
+
+std::string ResolvedNamenodeInfo::str() const {
+  std::stringstream ss;
+  ss << "ResolvedNamenodeInfo {nameservice: " << nameservice << ", name: " << name << ", uri: " << uri.str();
+  ss << ", host: " << uri.get_host();
+
+  if(uri.has_port())
+    ss << ", port: " << uri.get_port();
+  else
+    ss << ", invalid port (uninitialized)";
+
+  ss << ", scheme: " << uri.get_scheme();
+
+  ss << " [";
+  for(unsigned int i=0;i<endpoints.size();i++)
+    ss << endpoints[i] << " ";
+  ss << "] }";
+
+  return ss.str();
+}
+
+
+bool ResolveInPlace(::asio::io_service *ioservice, ResolvedNamenodeInfo &info) {
+  // this isn't very memory friendly, but if it needs to be called often there are bigger issues at hand
+  info.endpoints.clear();
+  std::vector<ResolvedNamenodeInfo> resolved = BulkResolve(ioservice, {info});
+  if(resolved.size() != 1)
+    return false;
+
+  info.endpoints = resolved[0].endpoints;
+  if(info.endpoints.size() == 0)
+    return false;
+  return true;
+}
+
+typedef std::vector<asio::ip::tcp::endpoint> endpoint_vector;
+
+// RAII wrapper
+class ScopedResolver {
+ private:
+  ::asio::io_service *io_service_;
+  std::string host_;
+  std::string port_;
+  ::asio::ip::tcp::resolver::query query_;
+  ::asio::ip::tcp::resolver resolver_;
+  endpoint_vector endpoints_;
+
+  // Caller blocks on access if resolution isn't finished
+  std::shared_ptr<std::promise<Status>> result_status_;
+ public:
+  ScopedResolver(::asio::io_service *service, const std::string &host, const std::string &port) :
+        io_service_(service), host_(host), port_(port), query_(host, port), resolver_(*io_service_)
+  {
+    if(!io_service_)
+      LOG_ERROR(kAsyncRuntime, << "ScopedResolver@" << this << " passed nullptr to io_service");
+  }
+
+  ~ScopedResolver() {
+    resolver_.cancel();
+  }
+
+  bool BeginAsyncResolve() {
+    // result_status_ would only exist if this was previously called.  Invalid state.
+    if(result_status_) {
+      LOG_ERROR(kAsyncRuntime, << "ScopedResolver@" << this << "::BeginAsyncResolve invalid call: may only be called once per instance");
+      return false;
+    } else if(!io_service_) {
+      LOG_ERROR(kAsyncRuntime, << "ScopedResolver@" << this << "::BeginAsyncResolve invalid call: null io_service");
+      return false;
+    }
+
+    // Now set up the promise, set it in async_resolve's callback
+    result_status_ = std::make_shared<std::promise<Status>>();
+    std::shared_ptr<std::promise<Status>> shared_result = result_status_;
+
+    // Callback to pull a copy of endpoints out of resolver and set promise
+    auto callback = [this, shared_result](const asio::error_code &ec, ::asio::ip::tcp::resolver::iterator out) {
+      if(!ec) {
+        std::copy(out, ::asio::ip::tcp::resolver::iterator(), std::back_inserter(endpoints_));
+      }
+      shared_result->set_value( ToStatus(ec) );
+    };
+    resolver_.async_resolve(query_, callback);
+    return true;
+  }
+
+  Status Join() {
+    if(!result_status_) {
+      std::ostringstream errmsg;
+      errmsg <<  "ScopedResolver@" << this << "Join invalid call: promise never set";
+      return Status::InvalidArgument(errmsg.str().c_str());
+    }
+
+    std::future<Status> future_result = result_status_->get_future();
+    Status res = future_result.get();
+    return res;
+  }
+
+  endpoint_vector GetEndpoints() {
+    // Explicitly return by value to decouple lifecycles.
+    return endpoints_;
+  }
+};
+
+std::vector<ResolvedNamenodeInfo> BulkResolve(::asio::io_service *ioservice, const std::vector<NamenodeInfo> &nodes) {
+  std::vector< std::unique_ptr<ScopedResolver> > resolvers;
+  resolvers.reserve(nodes.size());
+
+  std::vector<ResolvedNamenodeInfo> resolved_info;
+  resolved_info.reserve(nodes.size());
+
+  for(unsigned int i=0; i<nodes.size(); i++) {
+    std::string host = nodes[i].get_host();
+    std::string port = nodes[i].get_port();
+
+    resolvers.emplace_back(new ScopedResolver(ioservice, host, port));
+    resolvers[i]->BeginAsyncResolve();
+  }
+
+  // Join all async operations
+  for(unsigned int i=0; i < resolvers.size(); i++) {
+    Status asyncReturnStatus = resolvers[i]->Join();
+
+    ResolvedNamenodeInfo info;
+    info = nodes[i];
+
+    if(asyncReturnStatus.ok()) {
+      // Copy out endpoints if things went well
+      info.endpoints = resolvers[i]->GetEndpoints();
+    } else {
+      LOG_ERROR(kAsyncRuntime, << "Unabled to resolve endpoints for host: " << nodes[i].get_host()
+                                                               << " port: " << nodes[i].get_port());
+    }
+
+    resolved_info.push_back(info);
+  }
+  return resolved_info;
+}
+
+}

+ 49 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/namenode_info.h

@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_HDFS_NAMENODE_INFO_H_
+#define COMMON_HDFS_NAMENODE_INFO_H_
+
+#include <asio.hpp>
+#include <hdfspp/options.h>
+
+#include <string>
+#include <vector>
+
+namespace hdfs {
+
+// Internal representation of namenode info that keeps track
+// of its endpoints.
+struct ResolvedNamenodeInfo : public NamenodeInfo {
+  ResolvedNamenodeInfo& operator=(const NamenodeInfo &info);
+  std::string str() const;
+
+  std::vector<::asio::ip::tcp::endpoint> endpoints;
+};
+
+// Clear endpoints if set and resolve all of them in parallel.
+// Only successful lookups will be placed in the result set.
+std::vector<ResolvedNamenodeInfo> BulkResolve(::asio::io_service *ioservice, const std::vector<NamenodeInfo> &nodes);
+
+// Clear endpoints, if any, and resolve them again
+// Return true if endpoints were resolved
+bool ResolveInPlace(::asio::io_service *ioservice, ResolvedNamenodeInfo &info);
+
+}
+
+#endif

+ 52 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/new_delete.h

@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_HDFS_NEW_DELETE_H_
+#define COMMON_HDFS_NEW_DELETE_H_
+
+#include <cstring>
+
+struct mem_struct {
+  size_t mem_size;
+};
+
+#ifndef NDEBUG
+#define MEMCHECKED_CLASS(clazz) \
+static void* operator new(size_t size) { \
+  void* p = ::malloc(size); \
+  return p; \
+} \
+static void* operator new[](size_t size) { \
+  mem_struct* p = (mem_struct*)::malloc(sizeof(mem_struct) + size); \
+  p->mem_size = size; \
+  return (void*)++p; \
+} \
+static void operator delete(void* p) { \
+  ::memset(p, 0, sizeof(clazz)); \
+  ::free(p); \
+} \
+static void operator delete[](void* p) { \
+  mem_struct* header = (mem_struct*)p; \
+  size_t size = (--header)->mem_size; \
+  ::memset(p, 0, size); \
+  ::free(header); \
+}
+#else
+#define MEMCHECKED_CLASS(clazz)
+#endif
+#endif

+ 43 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/optional_wrapper.h

@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_OPTIONAL_WRAPPER_H_
+#define COMMON_OPTIONAL_WRAPPER_H_
+
+#ifdef __clang__
+  #pragma clang diagnostic push
+  #if __has_warning("-Wweak-vtables")
+    #pragma clang diagnostic ignored "-Wweak-vtables"
+  #endif
+  #if __has_warning("-Wreserved-id-macro")
+    #pragma clang diagnostic ignored "-Wreserved-id-macro"
+  #endif
+  #if __has_warning("-Wextra-semi")
+    #pragma clang diagnostic ignored "-Wextra-semi"
+  #endif
+  #define TR2_OPTIONAL_DISABLE_EMULATION_OF_TYPE_TRAITS  //For Clang < 3_4_2
+#endif
+
+#include <optional.hpp>
+
+#ifdef __clang__
+  #undef TR2_OPTIONAL_DISABLE_EMULATION_OF_TYPE_TRAITS  //For Clang < 3_4_2
+  #pragma clang diagnostic pop
+#endif
+
+#endif //COMMON_OPTIONAL_WRAPPER_H_

+ 61 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/options.cc

@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hdfspp/options.h"
+
+namespace hdfs {
+
+// The linker needs a place to put all of those constants
+const int Options::kDefaultRpcTimeout;
+const int Options::kNoRetry;
+const int Options::kDefaultMaxRpcRetries;
+const int Options::kDefaultRpcRetryDelayMs;
+const unsigned int Options::kDefaultHostExclusionDuration;
+const unsigned int Options::kDefaultFailoverMaxRetries;
+const unsigned int Options::kDefaultFailoverConnectionMaxRetries;
+const long Options::kDefaultBlockSize;
+
+Options::Options() : rpc_timeout(kDefaultRpcTimeout),
+                     rpc_connect_timeout(kDefaultRpcConnectTimeout),
+                     max_rpc_retries(kDefaultMaxRpcRetries),
+                     rpc_retry_delay_ms(kDefaultRpcRetryDelayMs),
+                     host_exclusion_duration(kDefaultHostExclusionDuration),
+                     defaultFS(),
+                     failover_max_retries(kDefaultFailoverMaxRetries),
+                     failover_connection_max_retries(kDefaultFailoverConnectionMaxRetries),
+                     authentication(kDefaultAuthentication),
+                     block_size(kDefaultBlockSize),
+                     io_threads_(kDefaultIoThreads)
+{
+
+}
+
+std::string NamenodeInfo::get_host() const {
+  return uri.get_host();
+}
+
+std::string NamenodeInfo::get_port() const {
+  if(uri.has_port()) {
+    return std::to_string(uri.get_port());
+  }
+  return "-1";
+}
+
+
+
+}

+ 87 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/retry_policy.cc

@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common/retry_policy.h"
+#include "common/logging.h"
+
+#include <sstream>
+
+namespace hdfs {
+
+RetryAction FixedDelayRetryPolicy::ShouldRetry(
+    const Status &s, uint64_t retries, uint64_t failovers,
+    bool isIdempotentOrAtMostOnce) const {
+  LOG_TRACE(kRPC, << "FixedDelayRetryPolicy::ShouldRetry(retries=" << retries << ", failovers=" << failovers << ")");
+  (void)isIdempotentOrAtMostOnce;
+  if (retries + failovers >= max_retries_) {
+    return RetryAction::fail(
+        "Failovers and retries(" + std::to_string(retries + failovers) +
+        ") exceeded maximum retries (" + std::to_string(max_retries_) + "), Status: " +
+        s.ToString());
+  } else {
+    return RetryAction::retry(delay_);
+  }
+}
+
+
+RetryAction NoRetryPolicy::ShouldRetry(
+    const Status &s, uint64_t retries, uint64_t failovers,
+    bool isIdempotentOrAtMostOnce) const {
+  LOG_TRACE(kRPC, << "NoRetryPolicy::ShouldRetry(retries=" << retries << ", failovers=" << failovers << ")");
+  (void)retries;
+  (void)failovers;
+  (void)isIdempotentOrAtMostOnce;
+  return RetryAction::fail("No retry, Status: " + s.ToString());
+}
+
+
+RetryAction FixedDelayWithFailover::ShouldRetry(const Status &s, uint64_t retries,
+    uint64_t failovers,
+    bool isIdempotentOrAtMostOnce) const {
+  (void)isIdempotentOrAtMostOnce;
+  (void)max_failover_conn_retries_;
+  LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry(retries=" << retries << ", failovers=" << failovers << ")");
+
+  if(failovers < max_failover_retries_ && (s.code() == ::asio::error::timed_out || s.get_server_exception_type() == Status::kStandbyException) )
+  {
+    // Try connecting to another NN in case this one keeps timing out
+    // Can add the backoff wait specified by dfs.client.failover.sleep.base.millis here
+    if(failovers == 0) {
+      // No delay on first failover if it looks like the NN was bad.
+      return RetryAction::failover(0);
+    } else {
+      return RetryAction::failover(delay_);
+    }
+  }
+
+  if(retries < max_retries_ && failovers < max_failover_retries_) {
+    LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry: retries < max_retries_ && failovers < max_failover_retries_");
+    return RetryAction::retry(delay_);
+  } else if (retries >= max_retries_ && failovers < max_failover_retries_) {
+    LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry: retries >= max_retries_ && failovers < max_failover_retries_");
+    return RetryAction::failover(delay_);
+  } else if (retries <= max_retries_ && failovers == max_failover_retries_) {
+    LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry: retries <= max_retries_ && failovers == max_failover_retries_");
+    // 1 last retry on new connection
+    return RetryAction::retry(delay_);
+  }
+
+  return RetryAction::fail("Retry and failover didn't work, Status: " + s.ToString());
+}
+
+}

+ 160 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/retry_policy.h

@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIB_COMMON_RETRY_POLICY_H_
+#define LIB_COMMON_RETRY_POLICY_H_
+
+#include "common/util.h"
+
+#include <string>
+#include <stdint.h>
+
+namespace hdfs {
+
+class RetryAction {
+ public:
+  enum RetryDecision { FAIL, RETRY, FAILOVER_AND_RETRY };
+
+  RetryDecision action;
+  uint64_t delayMillis;
+  std::string reason;
+
+  RetryAction(RetryDecision in_action, uint64_t in_delayMillis,
+              const std::string &in_reason)
+      : action(in_action), delayMillis(in_delayMillis), reason(in_reason) {}
+
+  static RetryAction fail(const std::string &reason) {
+    return RetryAction(FAIL, 0, reason);
+  }
+  static RetryAction retry(uint64_t delay) {
+    return RetryAction(RETRY, delay, "");
+  }
+  static RetryAction failover(uint64_t delay) {
+    return RetryAction(FAILOVER_AND_RETRY, delay, "");
+  }
+
+  std::string decision_str() const {
+    switch(action) {
+      case FAIL: return "FAIL";
+      case RETRY: return "RETRY";
+      case FAILOVER_AND_RETRY: return "FAILOVER_AND_RETRY";
+      default: return "UNDEFINED ACTION";
+    }
+  };
+};
+
+class RetryPolicy {
+ protected:
+  uint64_t delay_;
+  uint64_t max_retries_;
+  RetryPolicy(uint64_t delay, uint64_t max_retries) :
+              delay_(delay), max_retries_(max_retries) {}
+
+ public:
+  RetryPolicy() {};
+
+  virtual ~RetryPolicy() {}
+  /*
+   * If there was an error in communications, responds with the configured
+   * action to take.
+   */
+  virtual RetryAction ShouldRetry(const Status &s, uint64_t retries,
+                                            uint64_t failovers,
+                                            bool isIdempotentOrAtMostOnce) const = 0;
+
+  virtual std::string str() const { return "Base RetryPolicy"; }
+};
+
+
+/*
+ * Overview of how the failover retry policy works:
+ *
+ * 1) Acts the same as FixedDelayRetryPolicy in terms of connection retries against a single NN
+ *    with two differences:
+ *      a) If we have retried more than the maximum number of retries we will failover to the
+ *         other node and reset the retry counter rather than error out.  It will begin the same
+ *         routine on the other node.
+ *      b) If an attempted connection times out and max_failover_conn_retries_ is less than the
+ *         normal number of retries it will failover sooner.  The connection timeout retry limit
+ *         defaults to zero; the idea being that if a node is unresponsive it's better to just
+ *         try the secondary rather than incur the timeout cost multiple times.
+ *
+ * 2) Keeps track of the failover count in the same way that the retry count is tracked.  If failover
+ *    is triggered more than a set number (dfs.client.failover.max.attempts) of times then the operation
+ *    will error out in the same way that a non-HA operation would error if it ran out of retries.
+ *
+ * 3) Failover between namenodes isn't instantaneous so the RPC retry delay is reused to add a small
+ *    delay between failover attempts.  This helps prevent the client from quickly using up all of
+ *    its failover attempts while thrashing between namenodes that are both temporarily marked standby.
+ *    Note: The java client implements exponential backoff here with a base other than the rpc delay,
+ *    and this will do the same here in the future. This doesn't do any sort of exponential backoff
+ *    and the name can be changed to ExponentialDelayWithFailover when backoff is implemented.
+ */
+class FixedDelayWithFailover : public RetryPolicy {
+ public:
+  FixedDelayWithFailover(uint64_t delay, uint64_t max_retries,
+                         uint64_t max_failover_retries,
+                         uint64_t max_failover_conn_retries)
+      : RetryPolicy(delay, max_retries), max_failover_retries_(max_failover_retries),
+        max_failover_conn_retries_(max_failover_conn_retries) {}
+
+  RetryAction ShouldRetry(const Status &s, uint64_t retries,
+                          uint64_t failovers,
+                          bool isIdempotentOrAtMostOnce) const override;
+
+  std::string str() const override { return "FixedDelayWithFailover"; }
+
+ private:
+  // Attempts to fail over
+  uint64_t max_failover_retries_;
+  // Attempts to fail over if connection times out rather than
+  // tring to connect and wait for the timeout delay failover_retries_
+  // times.
+  uint64_t max_failover_conn_retries_;
+};
+
+
+/*
+ * Returns a fixed delay up to a certain number of retries
+ */
+class FixedDelayRetryPolicy : public RetryPolicy {
+ public:
+  FixedDelayRetryPolicy(uint64_t delay, uint64_t max_retries)
+      : RetryPolicy(delay, max_retries) {}
+
+  RetryAction ShouldRetry(const Status &s, uint64_t retries,
+                          uint64_t failovers,
+                          bool isIdempotentOrAtMostOnce) const override;
+
+  std::string str() const override { return "FixedDelayRetryPolicy"; }
+};
+
+/*
+ * Never retries
+ */
+class NoRetryPolicy : public RetryPolicy {
+ public:
+  NoRetryPolicy() {};
+  RetryAction ShouldRetry(const Status &s, uint64_t retries,
+                          uint64_t failovers,
+                          bool isIdempotentOrAtMostOnce) const override;
+
+  std::string str() const override { return "NoRetryPolicy"; }
+};
+}
+
+#endif

+ 66 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/sasl_authenticator.h

@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef LIB_COMMON_SASL_AUTHENTICATOR_H_
+#define LIB_COMMON_SASL_AUTHENTICATOR_H_
+
+#include "hdfspp/status.h"
+
+namespace hdfs {
+
+class DigestMD5AuthenticatorTest_TestResponse_Test;
+
+/**
+ * A specialized implementation of RFC 2831 for the HDFS
+ * DataTransferProtocol.
+ *
+ * The current lacks the following features:
+ *   * Encoding the username, realm, and password in ISO-8859-1 when
+ * it is required by the RFC. They are always encoded in UTF-8.
+ *   * Checking whether the challenges from the server are
+ * well-formed.
+ *   * Specifying authzid, digest-uri and maximum buffer size.
+ *   * Supporting QOP other than the auth level.
+ **/
+class DigestMD5Authenticator {
+public:
+  Status EvaluateResponse(const std::string &payload, std::string *result);
+  DigestMD5Authenticator(const std::string &username,
+                         const std::string &password, bool mock_nonce = false);
+
+private:
+  Status GenerateFirstResponse(std::string *result);
+  Status GenerateResponseValue(std::string *response_value);
+  Status ParseFirstChallenge(const std::string &payload);
+
+  static size_t NextToken(const std::string &payload, size_t off,
+                          std::string *tok);
+  void GenerateCNonce();
+  std::string username_;
+  std::string password_;
+  std::string nonce_;
+  std::string cnonce_;
+  std::string realm_;
+  std::string qop_;
+  unsigned nonce_count_;
+
+  const bool TEST_mock_cnonce_;
+  friend class DigestMD5AuthenticatorTest_TestResponse_Test;
+};
+}
+
+#endif

+ 240 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/sasl_digest_md5.cc

@@ -0,0 +1,240 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "sasl_authenticator.h"
+
+#include "common/util.h"
+
+#include <openssl/rand.h>
+#include <openssl/md5.h>
+
+#include <iomanip>
+#include <map>
+#include <sstream>
+
+namespace hdfs {
+
+static std::string QuoteString(const std::string &src);
+static std::string GetMD5Digest(const std::string &src);
+static std::string BinaryToHex(const std::string &src);
+
+static const char kDigestUri[] = "hdfs/0";
+static const size_t kMaxBufferSize = 65536;
+
+DigestMD5Authenticator::DigestMD5Authenticator(const std::string &username,
+                                               const std::string &password,
+                                               bool mock_nonce)
+    : username_(username), password_(password), nonce_count_(0),
+      TEST_mock_cnonce_(mock_nonce) {}
+
+Status DigestMD5Authenticator::EvaluateResponse(const std::string &payload,
+                                                std::string *result) {
+  Status status = ParseFirstChallenge(payload);
+  if (status.ok()) {
+    status = GenerateFirstResponse(result);
+  }
+  return status;
+}
+
+size_t DigestMD5Authenticator::NextToken(const std::string &payload, size_t off,
+                                         std::string *tok) {
+  tok->clear();
+  if (off >= payload.size()) {
+    return std::string::npos;
+  }
+
+  char c = payload[off];
+  if (c == '=' || c == ',') {
+    *tok = c;
+    return off + 1;
+  }
+
+  int quote_count = 0;
+  for (; off < payload.size(); ++off) {
+    char c = payload[off];
+    if (c == '"') {
+      ++quote_count;
+      if (quote_count == 2) {
+        return off + 1;
+      }
+      continue;
+    }
+
+    if (c == '=') {
+      if (quote_count) {
+        tok->append(&c, 1);
+      } else {
+        break;
+      }
+    } else if (('0' <= c && c <= '9') || ('a' <= c && c <= 'z') ||
+               ('A' <= c && c <= 'Z') || c == '+' || c == '/' || c == '-' ||
+               c == '_' || c == '@') {
+      tok->append(&c, 1);
+    } else {
+      break;
+    }
+  }
+  return off;
+}
+
+void DigestMD5Authenticator::GenerateCNonce() {
+  if (!TEST_mock_cnonce_) {
+    char buf[8] = {0,};
+    RAND_pseudo_bytes(reinterpret_cast<unsigned char *>(buf), sizeof(buf));
+    cnonce_ = Base64Encode(std::string(buf, sizeof(buf)));
+  }
+}
+
+Status DigestMD5Authenticator::ParseFirstChallenge(const std::string &payload) {
+  std::map<std::string, std::string> props;
+  std::string token;
+  enum {
+    kStateLVal,
+    kStateEqual,
+    kStateRVal,
+    kStateCommaOrEnd,
+  };
+
+  int state = kStateLVal;
+
+  std::string lval, rval;
+  size_t off = 0;
+  while (true) {
+    off = NextToken(payload, off, &token);
+    if (off == std::string::npos) {
+      break;
+    }
+
+    switch (state) {
+    case kStateLVal:
+      lval = token;
+      state = kStateEqual;
+      break;
+    case kStateEqual:
+      state = kStateRVal;
+      break;
+    case kStateRVal:
+      rval = token;
+      props[lval] = rval;
+      state = kStateCommaOrEnd;
+      break;
+    case kStateCommaOrEnd:
+      state = kStateLVal;
+      break;
+    }
+  }
+
+  if (props["algorithm"] != "md5-sess" || props["charset"] != "utf-8" ||
+      props.find("nonce") == props.end()) {
+    return Status::Error("Invalid challenge");
+  }
+  realm_ = props["realm"];
+  nonce_ = props["nonce"];
+  qop_ = props["qop"];
+  return Status::OK();
+}
+
+Status DigestMD5Authenticator::GenerateFirstResponse(std::string *result) {
+  // TODO: Support auth-int and auth-conf
+  // Handle cipher
+  if (qop_ != "auth") {
+    return Status::Unimplemented();
+  }
+
+  std::stringstream ss;
+  GenerateCNonce();
+  ss << "charset=utf-8,username=\"" << QuoteString(username_) << "\""
+     << ",authzid=\"" << QuoteString(username_) << "\""
+     << ",nonce=\"" << QuoteString(nonce_) << "\""
+     << ",digest-uri=\"" << kDigestUri << "\""
+     << ",maxbuf=" << kMaxBufferSize << ",cnonce=\"" << cnonce_ << "\"";
+
+  if (realm_.size()) {
+    ss << ",realm=\"" << QuoteString(realm_) << "\"";
+  }
+
+  ss << ",nc=" << std::hex << std::setw(8) << std::setfill('0')
+     << ++nonce_count_;
+  std::string response_value;
+  GenerateResponseValue(&response_value);
+  ss << ",response=" << response_value;
+  *result = ss.str();
+  return result->size() > 4096 ? Status::Error("Response too big")
+                               : Status::OK();
+}
+
+/**
+ * Generate the response value specified in S 2.1.2.1 in RFC2831.
+ **/
+Status
+DigestMD5Authenticator::GenerateResponseValue(std::string *response_value) {
+  std::stringstream begin_a1, a1_ss;
+  std::string a1, a2;
+
+  if (qop_ == "auth") {
+    a2 = std::string("AUTHENTICATE:") + kDigestUri;
+  } else {
+    a2 = std::string("AUTHENTICATE:") + kDigestUri +
+         ":00000000000000000000000000000000";
+  }
+
+  begin_a1 << username_ << ":" << realm_ << ":" << password_;
+  a1_ss << GetMD5Digest(begin_a1.str()) << ":" << nonce_ << ":" << cnonce_
+        << ":" << username_;
+
+  std::stringstream combine_ss;
+  combine_ss << BinaryToHex(GetMD5Digest(a1_ss.str())) << ":" << nonce_ << ":"
+             << std::hex << std::setw(8) << std::setfill('0') << nonce_count_
+             << ":" << cnonce_ << ":" << qop_ << ":"
+             << BinaryToHex(GetMD5Digest(a2));
+  *response_value = BinaryToHex(GetMD5Digest(combine_ss.str()));
+  return Status::OK();
+}
+
+static std::string QuoteString(const std::string &src) {
+  std::string dst;
+  dst.resize(2 * src.size());
+  size_t j = 0;
+  for (size_t i = 0; i < src.size(); ++i) {
+    if (src[i] == '"') {
+      dst[j++] = '\\';
+    }
+    dst[j++] = src[i];
+  }
+  dst.resize(j);
+  return dst;
+}
+
+static std::string GetMD5Digest(const std::string &src) {
+  MD5_CTX ctx;
+  unsigned long long res[2];
+  MD5_Init(&ctx);
+  MD5_Update(&ctx, src.c_str(), src.size());
+  MD5_Final(reinterpret_cast<unsigned char *>(res), &ctx);
+  return std::string(reinterpret_cast<char *>(res), sizeof(res));
+}
+
+static std::string BinaryToHex(const std::string &src) {
+  std::stringstream ss;
+  ss << std::hex << std::setfill('0');
+  for (size_t i = 0; i < src.size(); ++i) {
+    unsigned c = (unsigned)(static_cast<unsigned char>(src[i]));
+    ss << std::setw(2) << c;
+  }
+  return ss.str();
+}
+}

+ 74 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/statinfo.cc

@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <hdfspp/statinfo.h>
+#include <sys/stat.h>
+#include <sstream>
+#include <iomanip>
+
+namespace hdfs {
+
+StatInfo::StatInfo()
+  : file_type(0),
+    length(0),
+    permissions(0),
+    modification_time(0),
+    access_time(0),
+    block_replication(0),
+    blocksize(0),
+    fileid(0),
+    children_num(0) {
+}
+
+std::string StatInfo::str() const {
+  char perms[11];
+  perms[0] = file_type == StatInfo::IS_DIR ? 'd' : '-';
+  perms[1] = permissions & S_IRUSR? 'r' : '-';
+  perms[2] = permissions & S_IWUSR? 'w': '-';
+  perms[3] = permissions & S_IXUSR? 'x': '-';
+  perms[4] = permissions & S_IRGRP? 'r' : '-';
+  perms[5] = permissions & S_IWGRP? 'w': '-';
+  perms[6] = permissions & S_IXGRP? 'x': '-';
+  perms[7] = permissions & S_IROTH? 'r' : '-';
+  perms[8] = permissions & S_IWOTH? 'w': '-';
+  perms[9] = permissions & S_IXOTH? 'x': '-';
+  perms[10] = 0;
+
+  //Convert to seconds from milliseconds
+  const int time_field_length = 17;
+  time_t rawtime = modification_time/1000;
+  struct tm * timeinfo;
+  char buffer[time_field_length];
+  timeinfo = localtime(&rawtime);
+
+  strftime(buffer,time_field_length,"%Y-%m-%d %H:%M",timeinfo);
+  buffer[time_field_length-1] = 0;  //null terminator
+  std::string time(buffer);
+
+  std::stringstream ss;
+  ss  << std::left << std::setw(12) << perms
+      << std::left << std::setw(3) << (!block_replication ? "-" : std::to_string(block_replication))
+      << std::left << std::setw(15) << owner
+      << std::left << std::setw(15) << group
+      << std::right << std::setw(5) << length
+      << std::right << std::setw(time_field_length + 2) << time//modification_time
+      << "  " << full_path;
+  return ss.str();
+}
+
+}

+ 192 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/status.cc

@@ -0,0 +1,192 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hdfspp/status.h"
+
+#include <cassert>
+#include <sstream>
+#include <cstring>
+#include <map>
+#include <set>
+
+namespace hdfs {
+
+//  Server side exceptions that we capture from the RpcResponseHeaderProto
+const char * kStatusAccessControlException     = "org.apache.hadoop.security.AccessControlException";
+const char * kPathIsNotDirectoryException      = "org.apache.hadoop.fs.PathIsNotDirectoryException";
+const char * kSnapshotException                = "org.apache.hadoop.hdfs.protocol.SnapshotException";
+const char * kStatusStandbyException           = "org.apache.hadoop.ipc.StandbyException";
+const char * kStatusSaslException              = "javax.security.sasl.SaslException";
+const char * kPathNotFoundException            = "org.apache.hadoop.fs.InvalidPathException";
+const char * kPathNotFoundException2           = "java.io.FileNotFoundException";
+const char * kFileAlreadyExistsException       = "org.apache.hadoop.fs.FileAlreadyExistsException";
+const char * kPathIsNotEmptyDirectoryException = "org.apache.hadoop.fs.PathIsNotEmptyDirectoryException";
+
+
+const static std::map<std::string, int> kKnownServerExceptionClasses = {
+                                            {kStatusAccessControlException, Status::kAccessControlException},
+                                            {kPathIsNotDirectoryException, Status::kNotADirectory},
+                                            {kSnapshotException, Status::kSnapshotProtocolException},
+                                            {kStatusStandbyException, Status::kStandbyException},
+                                            {kStatusSaslException, Status::kAuthenticationFailed},
+                                            {kPathNotFoundException, Status::kPathNotFound},
+                                            {kPathNotFoundException2, Status::kPathNotFound},
+                                            {kFileAlreadyExistsException, Status::kFileAlreadyExists},
+                                            {kPathIsNotEmptyDirectoryException, Status::kPathIsNotEmptyDirectory}
+                                        };
+
+// Errors that retry cannot fix. TODO: complete the list.
+const static std::set<int> noRetryExceptions = {
+  Status::kPermissionDenied,
+  Status::kAuthenticationFailed,
+  Status::kAccessControlException
+};
+
+Status::Status(int code, const char *msg1)
+               : code_(code) {
+  if(msg1) {
+    msg_ = msg1;
+  }
+}
+
+Status::Status(int code, const char *exception_class_name, const char *exception_details)
+               : code_(code) {
+  // If we can assure this never gets nullptr args this can be
+  // in the initializer list.
+  if(exception_class_name)
+    exception_class_ = exception_class_name;
+  if(exception_details)
+    msg_ = exception_details;
+
+  std::map<std::string, int>::const_iterator it = kKnownServerExceptionClasses.find(exception_class_);
+  if(it != kKnownServerExceptionClasses.end()) {
+    code_ = it->second;
+  }
+}
+
+
+Status Status::OK() {
+  return Status();
+}
+
+Status Status::InvalidArgument(const char *msg) {
+  return Status(kInvalidArgument, msg);
+}
+
+Status Status::PathNotFound(const char *msg){
+  return Status(kPathNotFound, msg);
+}
+
+Status Status::ResourceUnavailable(const char *msg) {
+  return Status(kResourceUnavailable, msg);
+}
+
+Status Status::PathIsNotDirectory(const char *msg) {
+  return Status(kNotADirectory, msg);
+}
+
+Status Status::Unimplemented() {
+  return Status(kUnimplemented, "");
+}
+
+Status Status::Exception(const char *exception_class_name, const char *error_message) {
+  // Server side exception but can be represented by std::errc codes
+  if (exception_class_name && (strcmp(exception_class_name, kStatusAccessControlException) == 0) )
+    return Status(kPermissionDenied, error_message);
+  else if (exception_class_name && (strcmp(exception_class_name, kStatusSaslException) == 0))
+    return AuthenticationFailed();
+  else if (exception_class_name && (strcmp(exception_class_name, kPathNotFoundException) == 0))
+    return Status(kPathNotFound, error_message);
+  else if (exception_class_name && (strcmp(exception_class_name, kPathNotFoundException2) == 0))
+    return Status(kPathNotFound, error_message);
+  else if (exception_class_name && (strcmp(exception_class_name, kPathIsNotDirectoryException) == 0))
+    return Status(kNotADirectory, error_message);
+  else if (exception_class_name && (strcmp(exception_class_name, kSnapshotException) == 0))
+    return Status(kInvalidArgument, error_message);
+  else if (exception_class_name && (strcmp(exception_class_name, kFileAlreadyExistsException) == 0))
+    return Status(kFileAlreadyExists, error_message);
+  else if (exception_class_name && (strcmp(exception_class_name, kPathIsNotEmptyDirectoryException) == 0))
+    return Status(kPathIsNotEmptyDirectory, error_message);
+  else
+    return Status(kException, exception_class_name, error_message);
+}
+
+Status Status::Error(const char *error_message) {
+  return Exception("Exception", error_message);
+}
+
+Status Status::AuthenticationFailed() {
+  return Status::AuthenticationFailed(nullptr);
+}
+
+Status Status::AuthenticationFailed(const char *msg) {
+  std::string formatted = "AuthenticationFailed";
+  if(msg) {
+    formatted += ": ";
+    formatted += msg;
+  }
+  return Status(kAuthenticationFailed, formatted.c_str());
+}
+
+Status Status::AuthorizationFailed() {
+  return Status::AuthorizationFailed(nullptr);
+}
+
+Status Status::AuthorizationFailed(const char *msg) {
+  std::string formatted = "AuthorizationFailed";
+  if(msg) {
+    formatted += ": ";
+    formatted += msg;
+  }
+  return Status(kPermissionDenied, formatted.c_str());
+}
+
+Status Status::Canceled() {
+  return Status(kOperationCanceled, "Operation canceled");
+}
+
+Status Status::InvalidOffset(const char *msg){
+  return Status(kInvalidOffset, msg);
+}
+
+std::string Status::ToString() const {
+  if (code_ == kOk) {
+    return "OK";
+  }
+  std::stringstream ss;
+  if(!exception_class_.empty()) {
+    ss << exception_class_ << ":";
+  }
+  ss << msg_;
+  return ss.str();
+}
+
+bool Status::notWorthRetry() const {
+  return noRetryExceptions.find(code_) != noRetryExceptions.end();
+}
+
+Status Status::MutexError(const char *msg) {
+  std::string formatted = "MutexError";
+  if(msg) {
+    formatted += ": ";
+    formatted += msg;
+  }
+  return Status(kBusy/*try_lock failure errno*/, msg);
+}
+
+}

+ 454 - 0
hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/uri.cc

@@ -0,0 +1,454 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <hdfspp/uri.h>
+
+#include <uriparser2/uriparser/Uri.h>
+
+#include <string.h>
+#include <sstream>
+#include <cstdlib>
+#include <cassert>
+#include <limits>
+
+namespace hdfs
+{
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//   Internal utilities
+//
+///////////////////////////////////////////////////////////////////////////////
+
+const char kReserved[] = ":/?#[]@%+";
+
+std::string URI::encode(const std::string & decoded)
+{
+  bool hasCharactersToEncode = false;
+  for (auto c : decoded)
+  {
+    if (isalnum(c) || (strchr(kReserved, c) == NULL))
+    {
+      continue;
+    }
+    else
+    {
+      hasCharactersToEncode = true;
+      break;
+    }
+  }
+
+  if (hasCharactersToEncode)
+  {
+    std::vector<char> buf(decoded.size() * 3 + 1);
+    uriEscapeA(decoded.c_str(), &buf[0], true, URI_BR_DONT_TOUCH);
+    return std::string(&buf[0]);
+  }
+  else
+  {
+    return decoded;
+  }
+}
+
+std::string URI::decode(const std::string & encoded)
+{
+  bool hasCharactersToDecode = false;
+  for (auto c : encoded)
+  {
+    switch (c)
+    {
+    case '%':
+    case '+':
+      hasCharactersToDecode = true;
+      break;
+    default:
+      continue;
+    }
+  }
+
+  if (hasCharactersToDecode)
+  {
+    std::vector<char> buf(encoded.size() + 1);
+    strncpy(&buf[0], encoded.c_str(), buf.size());
+    uriUnescapeInPlaceExA(&buf[0], true, URI_BR_DONT_TOUCH);
+    return std::string(&buf[0]);
+  }
+  else
+  {
+    return encoded;
+  }
+}
+
+std::vector<std::string> split(const std::string input, char separator)
+{
+  std::vector<std::string> result;
+
+  if (!input.empty())
+  {
+    const char * remaining = input.c_str();
+    if (*remaining == '/')
+      remaining++;
+
+    const char * next_end = strchr(remaining, separator);
+    while (next_end) {
+      int len = next_end - remaining;
+      if (len)
+        result.push_back(std::string(remaining, len));
+      else
+        result.push_back("");
+      remaining = next_end + 1;
+      next_end = strchr(remaining, separator);
+    }
+    result.push_back(std::string(remaining));
+  }
+
+  return result;
+}
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//   Parsing
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+
+std::string copy_range(const UriTextRangeA *r) {
+  const int size = r->afterLast - r->first;
+  if (size) {
+      return std::string(r->first, size);
+  }
+  return "";
+}
+
+bool parse_int(const UriTextRangeA *r, int32_t& result)
+{
+  std::string int_str = copy_range(r);
+  if(!int_str.empty()) {
+    errno = 0;
+    unsigned long val = ::strtoul(int_str.c_str(), nullptr, 10);
+    if(errno == 0 && val < std::numeric_limits<uint16_t>::max()) {
+      result = val;
+      return true;
+    } else {
+      return false;
+    }
+  }
+  return true;
+}
+
+
+std::vector<std::string> copy_path(const UriPathSegmentA *ps) {
+    std::vector<std::string> result;
+  if (nullptr == ps)
+      return result;
+
+  for (; ps != 0; ps = ps->next) {
+    result.push_back(copy_range(&ps->text));
+  }
+
+  return result;
+}
+
+void parse_user_info(const UriTextRangeA *r, std::string * user, std::string * pass) {
+  // Output parameters
+  assert(user);
+  assert(pass);
+
+  std::string user_and_password = copy_range(r);
+  if (!user_and_password.empty()) {
+    const char * begin = user_and_password.c_str();
+    const char * colon_loc = strchr(begin, ':');
+    if (colon_loc) {
+      *user = std::string(begin, colon_loc - begin - 1);
+      *pass = colon_loc + 1;
+    } else {
+      *user = user_and_password;
+    }
+  }
+}
+
+
+std::vector<URI::Query> parse_queries(const char *first, const char * afterLast) {
+    std::vector<URI::Query>  result;
+    UriQueryListA * query;
+    int count;
+    int dissect_result = uriDissectQueryMallocExA(&query, &count, first, afterLast, false, URI_BR_DONT_TOUCH);
+    if (URI_SUCCESS == dissect_result) {
+      for (auto ps = query; ps != nullptr; ps = ps->next) {
+        std::string key = ps->key ? URI::encode(ps->key) : "";
+        std::string value = ps->value ? URI::encode(ps->value) : "";
+          result.emplace_back(key, value);
+      }
+      uriFreeQueryListA(query);
+    }
+
+  return result;
+}
+
+// Parse a string into a URI.  Throw a hdfs::uri_parse_error if URI is malformed.
+URI URI::parse_from_string(const std::string &str)
+{
+  URI ret;
+  bool ok = true;
+
+  UriParserStateA state;
+  memset(&state, 0, sizeof(state));
+  UriUriA uu;
+
+  state.uri = &uu;
+  int parseResult = uriParseUriA(&state, str.c_str());
+  ok &= (parseResult == URI_SUCCESS);
+
+  if (ok) {
+    ret.scheme = copy_range(&uu.scheme);
+    ret.host = copy_range(&uu.hostText);
+    ok &= parse_int(&uu.portText, ret._port);
+    ret.path = copy_path(uu.pathHead);
+    ret.queries = parse_queries(uu.query.first, uu.query.afterLast);
+    ret.fragment = copy_range(&uu.fragment);
+    parse_user_info(&uu.userInfo, &ret.user, &ret.pass);
+    uriFreeUriMembersA(&uu);
+  }
+  uriFreeUriMembersA(&uu);
+
+  if (ok) {
+    return ret;
+  } else {
+    throw uri_parse_error(str);
+  }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//   Getters and setters
+//
+///////////////////////////////////////////////////////////////////////////////
+
+URI::URI() : _port(-1) {}
+
+URI::Query::Query(const std::string& k, const std::string& v) : key(k), value(v) {}
+
+std::string URI::str(bool encoded_output) const
+{
+  std::stringstream ss;
+  if (!scheme.empty()) ss << from_encoded(encoded_output, scheme) << "://";
+  if (!user.empty() || !pass.empty()) {
+    if (!user.empty()) ss << from_encoded(encoded_output, user);
+    if (!pass.empty()) ss << ":" << from_encoded(encoded_output, pass);
+    ss << "@";
+  }
+  if (has_authority()) ss << build_authority(encoded_output);
+  if (!path.empty()) ss << get_path(encoded_output);
+  if (!queries.empty()) ss << "?" << get_query(encoded_output);
+  if (!fragment.empty()) ss << "#" << from_encoded(encoded_output, fragment);
+
+  return ss.str();
+}
+
+bool URI::has_authority() const
+{
+  return (!host.empty()) || (has_port());
+}
+
+std::string URI::build_authority(bool encoded_output) const
+{
+  std::stringstream ss;
+  ss << URI::from_encoded(encoded_output, host);
+  if (has_port())
+  {
+    ss << ":" << _port;
+  }
+  return ss.str();
+}
+
+std::string URI::get_scheme(bool encoded_output) const {
+  return from_encoded(encoded_output,scheme);
+}
+
+void URI::set_scheme(const std::string &s, bool encoded_input) {
+  scheme = to_encoded(encoded_input,s);
+}
+
+std::string URI::get_host(bool encoded_output) const {
+  return from_encoded(encoded_output,host);
+}
+
+void URI::set_host(const std::string& h, bool encoded_input) {
+  host = to_encoded(encoded_input,h);
+}
+
+bool URI::has_port() const {
+  return _port != -1;
+}
+
+uint16_t URI::get_port() const {
+  return (uint16_t)_port;
+}
+
+uint16_t URI::get_port_or_default(uint16_t val) const {
+  return has_port() ? (uint16_t)_port : val;
+}
+
+void URI::set_port(uint16_t p)
+{
+  _port = (int32_t)p & 0xFFFF;
+}
+
+void URI::clear_port()
+{
+  _port = -1;
+}
+
+std::string URI::get_path(bool encoded_output) const
+{
+  std::ostringstream out;
+  for (const std::string& s: path) {
+    out << "/" << from_encoded(encoded_output, s);
+  }
+  return out.str();
+}
+
+std::vector<std::string> URI::get_path_elements(bool encoded_output) const
+{
+  std::vector<std::string> result;
+  for (const std::string& path_elem: path) {
+    result.push_back(from_encoded(encoded_output, path_elem));
+  }
+
+  return result;
+}
+
+void URI::parse_path(bool input_encoded, const std::string &input_path)
+{
+  std::vector<std::string> split_path = split(input_path, '/');
+  for (const std::string& s: split_path) {
+    path.push_back(to_encoded(input_encoded, s));
+  }
+}
+
+// Mostly copied and modified from uriparser2.c
+
+void URI::set_path(const std::string &p, bool encoded_input) {
+  parse_path(encoded_input, p);
+}
+
+void URI::add_path(const std::string &p, bool encoded_input)
+{
+  path.push_back(to_encoded(encoded_input, p));
+}
+
+std::string URI::get_query(bool encoded_output) const {
+  bool first = true;
+  std::stringstream ss;
+  for (const Query& q: queries) {
+    if (!first) {
+      ss << "&";
+    }
+    ss << from_encoded(encoded_output, q.key) << "=" << from_encoded(encoded_output, q.value);
+    first = false;
+  }
+
+  return ss.str();
+}
+
+std::vector<URI::Query> URI::get_query_elements(bool encoded_output) const
+{
+  std::vector<Query> result;
+  for (const Query& q: queries) {
+    std::string key = from_encoded(encoded_output, q.key);
+    std::string value = from_encoded(encoded_output, q.value);
+    result.emplace_back(key, value);
+  }
+
+  return result;
+}
+
+void URI::set_query(const std::string &q) {
+  queries = parse_queries(q.c_str(), q.c_str() + q.size() + 1);
+}
+
+
+void URI::add_query(const std::string &name, const std::string & value, bool encoded_input)
+{
+  queries.emplace_back(to_encoded(encoded_input, name), to_encoded(encoded_input, value));
+}
+
+void URI::remove_query(const std::string &q_name, bool encoded_input)
+{
+  if (queries.empty())
+    return;
+
+  // This is the one place we need to do decoded comparisons
+  std::string decoded_key = encoded_input ? decode(q_name) : q_name;
+
+  for (int i = queries.size() - 1; i >= 0; i--) {
+    if (decode(queries[i].key) == decoded_key) {
+      queries.erase(queries.begin() + i);
+    }
+  }
+}
+
+std::string URI::get_fragment(bool encoded_output) const {
+  return from_encoded(encoded_output, fragment);
+}
+
+void URI::set_fragment(const std::string &f, bool encoded_input) {
+  fragment = to_encoded(encoded_input,f);
+}
+
+std::string URI::from_encoded(bool encoded_output, const std::string & input) {
+  return encoded_output ? input : decode(input);
+}
+
+std::string URI::to_encoded(bool encoded_input, const std::string & input) {
+  return encoded_input ? input : encode(input);
+}
+
+std::string URI::GetDebugString() const {
+  std::stringstream ss;
+  ss << std::endl;
+  ss << "\t" << "uri.str() = \"" << str() << "\"" << std::endl;
+  ss << "\t" << "uri.get_scheme() = \"" << get_scheme() << "\"" << std::endl;
+  ss << "\t" << "uri.get_host() = \"" << get_host() << "\"" << std::endl;
+
+  if(_port == -1)
+    ss << "\t" << "uri.get_port() = invalid (uninitialized)" << std::endl;
+  else
+    ss << "\t" << "uri.get_port() = \"" << _port << "\"" << std::endl;
+
+  ss << "\t" << "uri.get_path() = \"" << get_path() << "\"" << std::endl;
+  ss << "\t" << "uri.get_fragment() = \"" << get_fragment() << "\"" << std::endl;
+
+
+  std::vector<Query> query_elems = get_query_elements();
+
+  if(query_elems.size() > 0)
+    ss << "\t" << "Query elements:" << std::endl;
+
+  for(auto qry = query_elems.begin(); qry != query_elems.end(); qry++) {
+    ss << "\t\t" << qry->key << " -> " << qry->value << std::endl;
+  }
+
+  return ss.str();
+}
+
+} // end namespace hdfs

Some files were not shown because too many files changed in this diff