Selaa lähdekoodia

HADOOP-10818. native client: refactor URI code to be clearer (cmccabe)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HADOOP-10388@1612631 13f79535-47bb-0310-9956-ffa450edef68
Colin McCabe 10 vuotta sitten
vanhempi
commit
0056de1188

+ 3 - 2
hadoop-native-core/src/main/native/CMakeLists.txt

@@ -154,8 +154,9 @@ add_executable(varint-unit rpc/varint-unit.c
 add_utest(varint-unit)
 
 add_executable(string-unit common/string-unit.c
-    common/string.c test/test.c)
+    common/hadoop_err.c common/string.c test/test.c)
 add_utest(string-unit)
+target_link_libraries(string-unit ${LIBUV_LIB})
 
 add_executable(htable-unit common/htable-unit.c
     common/htable.c test/test.c)
@@ -177,7 +178,7 @@ add_utest(hadoop_err-unit)
 target_link_libraries(hadoop_err-unit ${LIBUV_LIB})
 
 add_executable(uri-unit common/uri-unit.c
-    common/uri.c common/hadoop_err.c test/test.c)
+    common/uri.c common/hadoop_err.c common/string.c test/test.c)
 add_utest(uri-unit)
 target_link_libraries(uri-unit ${URIPARSER_LIB} ${LIBUV_LIB})
 

+ 26 - 0
hadoop-native-core/src/main/native/common/string.c

@@ -16,6 +16,7 @@
  * limitations under the License.
  */
 
+#include "common/hadoop_err.h"
 #include "common/string.h"
 
 #include <errno.h>
@@ -65,4 +66,29 @@ int strdupto(char **dst, const char *src)
     return 0;
 }
 
+struct hadoop_err *vdynprintf(char **out, const char *fmt, va_list ap)
+{
+    char *str;
+
+    if (vasprintf(&str, fmt, ap) < 0) {
+        return hadoop_lerr_alloc(ENOMEM, "vdynprintf: OOM");
+    }
+    if (*out) {
+        free(*out);
+    }
+    *out = str;
+    return NULL;
+}
+
+struct hadoop_err *dynprintf(char **out, const char *fmt, ...)
+{
+    struct hadoop_err *err;
+    va_list ap;
+
+    va_start(ap, fmt);
+    err = vdynprintf(out, fmt, ap);
+    va_end(ap);
+    return err;
+}
+
 // vim: ts=4:sw=4:tw=79:et

+ 23 - 0
hadoop-native-core/src/main/native/common/string.h

@@ -47,6 +47,29 @@ void hex_buf_print(FILE *fp, const void *buf, int32_t buf_len,
  */
 int strdupto(char **dst, const char *src);
 
+/**
+ * Print to a dynamically allocated string.
+ *
+ * @param out       (out param) where to place the dynamically allocated
+ *                      string.  If *out is non-NULL, it will be freed.  *out
+ *                      may appear as an input parameter as well.
+ * @param fmt       printf-style format string.
+ * @param ap        printf-style arguments.
+ */
+struct hadoop_err *vdynprintf(char **out, const char *fmt, va_list ap);
+
+/**
+ * Print to a dynamically allocated string.
+ *
+ * @param out       (out param) where to place the dynamically allocated
+ *                      string.  If *out is non-NULL, it will be freed.  *out
+ *                      may appear as an input parameter as well.
+ * @param fmt       printf-style format string.
+ * @param ...       printf-style arguments.
+ */
+struct hadoop_err *dynprintf(char **out, const char *fmt, ...)
+        __attribute__((format(printf, 2, 3)));
+
 #endif
 
 // vim: ts=4:sw=4:tw=79:et

+ 12 - 45
hadoop-native-core/src/main/native/common/uri-unit.c

@@ -40,65 +40,32 @@ static int check_uri_params(const char *scheme, const char *escheme,
 }
 
 static struct hadoop_err *test_parse_uri(const char *uri_str,
-            const char *escheme, const char *euser, const char *eauth,
+            const char *escheme, const char *euser_info, const char *eauth,
             int eport, const char *epath)
 {
-    UriParserStateA base_uri_state, uri_state;
-    UriUriA base_uri, uri;
+    struct hadoop_uri *base = NULL, *uri = NULL;
     struct hadoop_err *err = NULL;
-    char *scheme = NULL, *user = NULL, *auth = NULL;
-    char *path = NULL;
-    uint16_t port;
 
-    memset(&uri_state, 0, sizeof(uri_state));
-    err = uri_parse_abs("hdfs:///home/cmccabe/", &base_uri_state,
-            &base_uri, "hdfs");
+    err = hadoop_uri_parse("hdfs:///home/cmccabe/", NULL, &base,
+                H_URI_APPEND_SLASH | H_URI_PARSE_ALL);
     if (err)
         goto done;
-    err = uri_parse(uri_str, &uri_state, &uri, &base_uri);
+    err = hadoop_uri_parse(uri_str, base, &uri, H_URI_PARSE_ALL);
     if (err)
         goto done;
-    err = uri_get_scheme(&uri, &scheme);
-    if (err)
-        goto done;
-    err = uri_get_user_info(&uri, &user);
-    if (err)
-        goto done;
-    // Get the authority, which we typically treat as a hostname.
-    err = uri_get_authority(&uri, &auth);
-    if (err)
-        goto done;
-    err = uri_get_path(&uri, &path);
-    if (err)
-        goto done;
-    err = uri_get_port(&uri, &port);
-    if (err)
-        goto done;
-//    fprintf(stderr, "test_parse_uri(%s): "
-//            "scheme=%s, user=%s, auth=%s, path=%s\n",
-//            uri_str, scheme, user, auth, path);
-    err = NULL;
-    if (check_uri_params(scheme, escheme,
-                         user, euser,
-                         auth, eauth,
-                        port, eport,
-                         path, epath)) {
+    if (check_uri_params(uri->scheme, escheme,
+                         uri->user_info, euser_info,
+                         uri->auth, eauth, uri->port, eport,
+                         uri->path, epath)) {
         err = hadoop_lerr_alloc(EINVAL, "check_uri_params: failed.");
         if (err)
             goto done;
     }
+    err = NULL;
 
 done:
-    if (base_uri_state.uri) {
-        uriFreeUriMembersA(&base_uri);
-    }
-    if (uri_state.uri) {
-        uriFreeUriMembersA(&uri);
-    }
-    free(scheme);
-    free(user);
-    free(auth);
-    free(path);
+    hadoop_uri_free(base);
+    hadoop_uri_free(uri);
     return err;
 }
 

+ 142 - 133
hadoop-native-core/src/main/native/common/uri.c

@@ -18,11 +18,13 @@
 
 #include "common/hadoop_err.h"
 #include "common/uri.h"
+#include "common/string.h"
 
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <uriparser/Uri.h>
 
 static struct hadoop_err *uri_err_to_hadoop_err(int err)
 {
@@ -56,115 +58,6 @@ static struct hadoop_err *uri_err_to_hadoop_err(int err)
     }
 }
 
-struct hadoop_err *uri_parse_abs(const char *str, UriParserStateA *state,
-            UriUriA *uri, const char *def_scheme)
-{
-    int ret;
-    struct hadoop_err *err = NULL;
-    size_t str_len;
-    const char *effective_str = NULL;
-    char *malloced_str = NULL, *nmalloced_str;
-
-    // If the URI doesn't end with a slash, append one.
-    // This is necessary to get AddBaseUri to act like we expect when using
-    // this absolute URI as a base.
-    state->uri = NULL;
-    str_len = strlen(str);
-    if ((str_len == 0) || (str[str_len - 1] != '/')) {
-        if (asprintf(&malloced_str, "%s/", str) < 0) {
-            err = hadoop_lerr_alloc(ENOMEM, "uri_parse_abs: OOM");
-            malloced_str = NULL;
-            goto done;
-        }
-        effective_str = malloced_str;
-    } else {
-        effective_str = str;
-    }
-    state->uri = uri;
-    ret = uriParseUriA(state, effective_str);
-    if (ret) {
-        state->uri = NULL;
-        err = hadoop_err_prepend(uri_err_to_hadoop_err(ret),
-            0, "uri_parse: failed to parse '%s' as URI",
-            effective_str);
-        goto done;
-    }
-    if (uri->scheme.first == NULL) {
-        // If the URI doesn't have a scheme, prepend the default one to the
-        // string, and re-parse.  This is necessary because AddBaseUri refuses
-        // to rebase URIs on absolute URIs without a scheme.
-        if (asprintf(&nmalloced_str, "%s://%s", def_scheme,
-                     effective_str) < 0) {
-            err = hadoop_lerr_alloc(ENOMEM, "uri_parse_abs: OOM");
-            goto done;
-        }
-        free(malloced_str);
-        malloced_str = nmalloced_str;
-        effective_str = malloced_str;
-        uriFreeUriMembersA(uri);
-        state->uri = uri;
-        ret = uriParseUriA(state, effective_str);
-        if (ret) {
-            state->uri = NULL;
-            err = hadoop_err_prepend(uri_err_to_hadoop_err(ret),
-                0, "uri_parse: failed to parse '%s' as URI",
-                effective_str);
-            goto done;
-        }
-    }
-    err = NULL;
-done:
-    if (err) {
-        if (state->uri) {
-            uriFreeUriMembersA(state->uri);
-            state->uri = NULL;
-        }
-    }
-    return err;
-}
-
-struct hadoop_err *uri_parse(const char *str, UriParserStateA *state,
-            UriUriA *uri, UriUriA *base_uri)
-{
-    int ret;
-    struct hadoop_err *err = NULL;
-    UriUriA first_uri;
-
-    state->uri = &first_uri;
-    ret = uriParseUriA(state, str);
-    if (ret) {
-        state->uri = NULL;
-        err = hadoop_err_prepend(uri_err_to_hadoop_err(ret),
-            0, "uri_parse: failed to parse '%s' as a URI", str);
-        goto done;
-    }
-//    fprintf(stderr, "str=%s, base_path=%s, base_uri->absolutePath=%d\n",
-//            str, base_path, base_uri.absolutePath);
-//        fprintf(stderr, "uriAddBaseUriA base_path=%s, str=%s, ret %d\n", base_path, str, ret); 
-    ret = uriAddBaseUriA(uri, &first_uri, base_uri);
-    if (ret) {
-        err = hadoop_err_prepend(uri_err_to_hadoop_err(ret),
-            0, "uri_parse: failed to add base URI");
-        goto done;
-    }
-    uriFreeUriMembersA(&first_uri);
-    state->uri = uri;
-    ret = uriNormalizeSyntaxA(uri);
-    if (ret) {
-        err = hadoop_err_prepend(uri_err_to_hadoop_err(ret),
-            0, "uri_parse: failed to normalize URI");
-        goto done;
-    }
-done:
-    if (err) {
-        if (state->uri) {
-            uriFreeUriMembersA(uri);
-            state->uri = NULL;
-        }
-    }
-    return err;
-}
-
 static struct hadoop_err *text_range_to_str(struct UriTextRangeStructA *text,
                                             char **out, const char *def)
 {
@@ -204,29 +97,7 @@ done:
     return NULL;
 }
 
-struct hadoop_err *uri_get_scheme(UriUriA *uri, char **out)
-{
-    struct hadoop_err *err;
-    char *scheme = NULL;
-
-    err = text_range_to_str(&uri->scheme, &scheme, "");
-    if (err)
-        return err;
-    *out = scheme;
-    return NULL;
-}
-
-struct hadoop_err *uri_get_user_info(UriUriA *uri, char **user_info)
-{
-    return text_range_to_str(&uri->userInfo, user_info, "");
-}
-
-struct hadoop_err *uri_get_authority(UriUriA *uri, char **authority)
-{
-    return text_range_to_str(&uri->hostText, authority, "");
-}
-
-struct hadoop_err *uri_get_port(UriUriA *uri, uint16_t *out)
+static struct hadoop_err *uri_get_port(UriUriA *uri, uint16_t *out)
 {
     struct hadoop_err *err;
     char *port_str = NULL;
@@ -245,7 +116,7 @@ struct hadoop_err *uri_get_port(UriUriA *uri, uint16_t *out)
     return NULL;
 }
 
-struct hadoop_err *uri_get_path(UriUriA *uri, char **out)
+static struct hadoop_err *uri_get_path(UriUriA *uri, char **out)
 {
     struct UriPathSegmentStructA *cur;
     size_t i = 0, path_len = 0;
@@ -295,4 +166,142 @@ struct hadoop_err *uri_get_path(UriUriA *uri, char **out)
     return NULL;
 }
 
+struct hadoop_err *hadoop_uri_parse(const char *input,
+                struct hadoop_uri *base, struct hadoop_uri **out, int flags)
+{
+    struct hadoop_err *err;
+    struct hadoop_uri *uri;
+    UriUriA rel;
+    UriParserStateA state;
+    int ret;
+
+    memset(&state, 0, sizeof(state));
+    uri = calloc(1, sizeof(struct hadoop_uri));
+    if (!uri)
+        goto oom;
+    if (flags & H_URI_APPEND_SLASH) {
+        // Append a slash to the URI if needed.
+        size_t input_len = strlen(input);
+        if ((input_len == 0) || (input[input_len - 1] != '/')) {
+            err = dynprintf(&uri->text, "%s/", input);
+            if (err)
+                goto error;
+        }
+    }
+    if (!uri->text) {
+        // Copy the input string.
+        uri->text = strdup(input);
+        if (!uri->text)
+            goto oom;
+    }
+    if (base) {
+        // Parse the supplied text.
+        state.uri = &rel;
+        ret = uriParseUriA(&state, uri->text);
+        if (ret) {
+            state.uri = NULL;
+            err = hadoop_err_prepend(uri_err_to_hadoop_err(ret),
+                    0, "uri_parse: failed to parse '%s' as a URI", uri->text);
+            goto error;
+        }
+    //    fprintf(stderr, "str=%s, base_path=%s, base_uri->absolutePath=%d\n",
+    //            str, base_path, base_uri.absolutePath);
+    //        fprintf(stderr, "uriAddBaseUriA base_path=%s, str=%s, ret %d\n", base_path, str, ret);
+        // Add the supplied 'base' URI.
+        ret = uriAddBaseUriA(&uri->uri, &rel, &base->uri);
+        if (ret) {
+            err = hadoop_err_prepend(uri_err_to_hadoop_err(ret),
+                        0, "uri_parse: failed to add base URI");
+            goto error;
+        }
+        uriFreeUriMembersA(state.uri);
+        state.uri = &uri->uri;
+    } else {
+        // Parse the supplied text.
+        state.uri = &uri->uri;
+        ret = uriParseUriA(&state, uri->text);
+        if (ret) {
+            state.uri = NULL;
+            err = hadoop_err_prepend(uri_err_to_hadoop_err(ret),
+                    0, "uri_parse: failed to parse '%s' as a URI", uri->text);
+            goto error;
+        }
+    }
+    ret = uriNormalizeSyntaxA(&uri->uri);
+    if (ret) {
+        err = hadoop_err_prepend(uri_err_to_hadoop_err(ret),
+                    0, "uri_parse: failed to normalize URI");
+        goto error;
+    }
+    if (flags & H_URI_PARSE_SCHEME) {
+        err = text_range_to_str(&uri->uri.scheme, &uri->scheme, "");
+        if (err)
+            goto error;
+    }
+    if (flags & H_URI_PARSE_USER_INFO) {
+        err = text_range_to_str(&uri->uri.userInfo, &uri->user_info, "");
+        if (err)
+            goto error;
+    }
+    if (flags & H_URI_PARSE_AUTH) {
+        err = text_range_to_str(&uri->uri.hostText, &uri->auth, "");
+        if (err)
+            goto error;
+    }
+    if (flags & H_URI_PARSE_PORT) {
+        err = uri_get_port(&uri->uri, &uri->port);
+        if (err)
+            goto error;
+    }
+    if (flags & H_URI_PARSE_PATH) {
+        err = uri_get_path(&uri->uri, &uri->path);
+        if (err)
+            goto error;
+    }
+    *out = uri;
+    return NULL;
+
+oom:
+    err = hadoop_lerr_alloc(ENOMEM, "uri_parse(%s): OOM", uri->text);
+error:
+    if (state.uri) {
+        uriFreeUriMembersA(state.uri);
+    }
+    if (uri) {
+        free(uri->text);
+        free(uri->scheme);
+        free(uri->user_info);
+        free(uri->auth);
+        free(uri->path);
+        free(uri);
+    }
+    return err;
+}
+
+static const char *check_null(const char *str)
+{
+    return str ? str : "(null)";
+}
+
+struct hadoop_err *hadoop_uri_to_str(const struct hadoop_uri *uri, char **out)
+{
+    return dynprintf(out, "[scheme=%s, user_info=%s, auth=%s, "
+                     "port=%d, path=%s]",
+              check_null(uri->scheme), check_null(uri->user_info),
+              check_null(uri->auth), uri->port, check_null(uri->path));
+}
+
+void hadoop_uri_free(struct hadoop_uri *uri)
+{
+    if (!uri)
+        return;
+    uriFreeUriMembersA(&uri->uri);
+    free(uri->text);
+    free(uri->scheme);
+    free(uri->user_info);
+    free(uri->auth);
+    free(uri->path);
+    free(uri);
+}
+
 // vim: ts=4:sw=4:tw=79:et

+ 48 - 66
hadoop-native-core/src/main/native/common/uri.h

@@ -19,89 +19,71 @@
 #ifndef HADOOP_CORE_COMMON_URI
 #define HADOOP_CORE_COMMON_URI
 
+#include <inttypes.h>
 #include <uriparser/Uri.h>
 
-/**
- * Parse an absolute URI.
- *
- * @param str           The string to parse.  If there is not a slash at the
- *                          end, one will be added.
- * @param state         (inout) The URI parser state to use.
- *                          On success, state->uri will be set to a non-NULL
- *                          value.
- * @param uri           (out param) The URI object to fill.
- * @param def_scheme    The default scheme to add if there is no scheme.
- *
- * @return              NULL on success; the URI parsing problem otherwise.
- */
-struct hadoop_err *uri_parse_abs(const char *str, UriParserStateA *state,
-            UriUriA *uri, const char *def_scheme);
+struct hadoop_uri {
+    /** The text we parsed to create this URI. */
+    char *text;
 
-/**
- * Parse a relative or absolute URI.
- *
- * @param str           The string to parse.
- * @param state         (inout) The URI parser state to use.
- *                          On success, state->uri will be set to a non-NULL
- *                          value.
- * @param uri           (out param) The URI object to fill.
- *
- * @return              NULL on success; the URI parsing problem otherwise.
- */
-struct hadoop_err *uri_parse(const char *str, UriParserStateA *state,
-            UriUriA *uri, UriUriA *base_uri);
+    /** URI structure. */
+    UriUriA uri;
 
-/**
- * Get the scheme of a URI.
- *
- * We disallow schemes with non-ASCII characters.
- *
- * @param uri           The Uri object.
- * @param scheme        (out param) the scheme.
- *
- * @return              NULL on success; the URI parsing problem otherwise.
- */
-struct hadoop_err *uri_get_scheme(UriUriA *uri, char **scheme);
+    /** URI Scheme or NULL. */
+    char *scheme;
 
-/**
- * Get the user_info of a URI.
- *
- * @param uri           The Uri object.
- * @param user_info     (out param) the user_info.
- *
- * @return              NULL on success; the URI parsing problem otherwise.
- */
-struct hadoop_err *uri_get_user_info(UriUriA *uri, char **user_info);
+    /** User info or NULL. */
+    char *user_info;
+
+    /** Authority or NULL. */
+    char *auth;
+
+    /** URI Port. */
+    uint16_t port;
+
+    /** URI path. */
+    char *path;
+};
+
+#define H_URI_APPEND_SLASH              0x01
+#define H_URI_PARSE_SCHEME              0x02
+#define H_URI_PARSE_USER_INFO           0x04
+#define H_URI_PARSE_AUTH                0x08
+#define H_URI_PARSE_PORT                0x10
+#define H_URI_PARSE_PATH                0x20
+#define H_URI_PARSE_ALL \
+        (H_URI_PARSE_SCHEME | \
+        H_URI_PARSE_USER_INFO | \
+        H_URI_PARSE_AUTH | \
+        H_URI_PARSE_PORT | \
+        H_URI_PARSE_PATH)
 
 /**
- * Get the authority of a URI.
+ * Parse a Hadoop URI.
  *
- * @param uri           The Uri object.
- * @param authority     (out param) the authority.
- *
- * @return              NULL on success; the URI parsing problem otherwise.
+ * @param text          The text to parse.
+ * @param base          If non-NULL, the URI to use as a base if the URI we're
+ *                          parsing is relative.
+ * @param out           (out param) The URI.
+ * @param flags         Parse flags.
  */
-struct hadoop_err *uri_get_authority(UriUriA *uri, char **authority);
+struct hadoop_err *hadoop_uri_parse(const char *text,
+                struct hadoop_uri *base, struct hadoop_uri **out, int flags);
 
 /**
- * Get the port of a URI.
- *
- * @param uri           The Uri object.
- * @param port          (out param) the port, or 0 if there was no port.
+ * Dynamically allocate a string representing the hadoop_uri.
  *
- * @return              NULL on success; the URI parsing problem otherwise.
+ * @param uri           The URI.
+ * @param out           (out param)
  */
-struct hadoop_err *uri_get_port(UriUriA *uri, uint16_t *port);
+struct hadoop_err *hadoop_uri_to_str(const struct hadoop_uri *uri, char **out);
 
 /**
- * Get the path of a URI.
- *
- * @param uri       The Uri object.
- * @param path      (out param) the path.
+ * Free a Hadoop URI.
  *
- * @return          NULL on success; the URI parsing problem otherwise.
+ * @param uri           The URI to free.
  */
-struct hadoop_err *uri_get_path(UriUriA *uri, char **path);
+void hadoop_uri_free(struct hadoop_uri *uri);
 
 #endif
 

+ 2 - 3
hadoop-native-core/src/main/native/fs/common.c

@@ -42,7 +42,7 @@ void release_file_info_entry(hdfsFileInfo *hdfsFileInfo)
 int hadoopfs_errno_and_retcode(struct hadoop_err *err)
 {
     if (err) {
-        fputs(hadoop_err_msg(err), stderr);
+        fprintf(stderr, "%s\n", hadoop_err_msg(err));
         errno = hadoop_err_code(err);
         hadoop_err_free(err);
         return -1;
@@ -53,8 +53,7 @@ int hadoopfs_errno_and_retcode(struct hadoop_err *err)
 void *hadoopfs_errno_and_retptr(struct hadoop_err *err, void *ptr)
 {
     if (err) {
-        fputs(hadoop_err_msg(err), stderr);
-        errno = hadoop_err_code(err);
+        fprintf(stderr, "%s\n", hadoop_err_msg(err));
         hadoop_err_free(err);
         return NULL;
     }

+ 43 - 52
hadoop-native-core/src/main/native/fs/fs.c

@@ -190,82 +190,72 @@ static struct hadoop_err *hdfs_builder_parse_conn_uri(
                                     struct hdfsBuilder *hdfs_bld)
 {
     int ret;
-    uint16_t port;
-    const char *uri_str;
-    char *malloced_uri_str = NULL;
-    UriParserStateA uri_state;
-    UriUriA uri;
+    char *uri_str = NULL, *uri_dbg = NULL;
     struct hadoop_err *err = NULL;
 
-    memset(&uri_state, 0, sizeof(uri_state));
-    uri_str = hdfs_bld->nn;
-    if (uri_str) {
-        // If the connection URI was set via hdfsBuilderSetNameNode, it may
-        // not be a real URI, but just a <hostname>:<port> pair.  This won't
-        // parse correctly unless we add a hdfs:// scheme in front of it.
-        if ((!index(uri_str, '/')) && (index(uri_str, ':'))) {
-            if (asprintf(&malloced_uri_str, "hdfs://%s", uri_str) < 0) {
-                malloced_uri_str = NULL;
-                err = hadoop_lerr_alloc(ENOMEM, "uri_parse: OOM "
-                                        "adding default scheme");
+    if (hdfs_bld->nn) {
+        if ((!index(hdfs_bld->nn, '/')) && (index(hdfs_bld->nn, ':'))) {
+            // If the connection URI was set via hdfsBuilderSetNameNode, it may
+            // not be a real URI, but just a <hostname>:<port> pair.  This won't
+            // parse correctly unless we add a hdfs:// scheme in front of it.
+            err = dynprintf(&uri_str, "hdfs://%s", hdfs_bld->nn);
+            if (err)
+                goto done;
+        } else {
+            uri_str = strdup(hdfs_bld->nn);
+            if (!uri_str) {
+                err = hadoop_lerr_alloc(ENOMEM, "hdfs_builder_parse_conn_uri: OOM");
                 goto done;
             }
-            uri_str = malloced_uri_str;
         }
     } else {
-        uri_str = hconf_get(hdfs_bld->hconf, "fs.defaultFS");
+        const char *default_fs = hconf_get(hdfs_bld->hconf, "fs.defaultFS");
+        if (!default_fs) {
+            default_fs = "file:///";
+        }
+        uri_str = strdup(default_fs);
         if (!uri_str) {
-            uri_str = "file:///";
+            err = hadoop_lerr_alloc(ENOMEM, "hdfs_builder_parse_conn_uri: OOM");
+            goto done;
         }
     }
-    err = uri_parse_abs(uri_str, &uri_state, &uri, DEFAULT_SCHEME);
-    if (err)
-        goto done;
-    err = uri_get_scheme(&uri, &hdfs_bld->uri_scheme);
-    if (err)
-        goto done; 
-    // Get the user_info.  We default to the userName passed in to the hdfs
-    // builder.
-    err = uri_get_user_info(&uri, &hdfs_bld->uri_user_info);
+    err = hadoop_uri_parse(uri_str, NULL, &hdfs_bld->uri,
+                     H_URI_APPEND_SLASH | H_URI_PARSE_ALL);
     if (err)
         goto done;
-    if (hdfs_bld->uri_user_info[0] == '\0') {
+    if (hdfs_bld->uri->user_info[0] == '\0') {
         // If we still don't have an authority, fill in the authority from the
         // current user name.
-        free(hdfs_bld->uri_user_info);
-        hdfs_bld->uri_user_info = NULL;
-        ret = geteuid_string(&hdfs_bld->uri_user_info);
+        free(hdfs_bld->uri->user_info);
+        hdfs_bld->uri->user_info = NULL;
+        ret = geteuid_string(&hdfs_bld->uri->user_info);
         if (ret) {
             err = hadoop_lerr_alloc(ret, "geteuid_string failed: error "
                                     "%d", ret);
             goto done;
         }
     }
-    // Get the authority, which we typically treat as a hostname.
-    err = uri_get_authority(&uri, &hdfs_bld->uri_authority);
+    err = hadoop_uri_to_str(hdfs_bld->uri, &uri_dbg);
     if (err)
         goto done;
-    // Get the port, or 0.
-    err = uri_get_port(&uri, &port);
-    if (err)
-        goto done;
-    fprintf(stderr, "hdfs_builder_parse_conn_uri: "
-            "uri_scheme=%s, uri_user_info=%s, "
-            "uri_authority=%s, port=%d\n",
-            hdfs_bld->uri_scheme, hdfs_bld->uri_user_info,
-            hdfs_bld->uri_authority, port);
-    // The URI's port overrides the port supplied via
-    // hdfsBuilderSetNameNodePort.
-    if (port) {
-        hdfs_bld->port = port;
+    fprintf(stderr, "hdfs_builder_parse_conn_uri: %s\n", uri_dbg);
+    if (hdfs_bld->port == 0) {
+        hdfs_bld->port = hdfs_bld->uri->port;
+    } else {
+        if (hdfs_bld->port != hdfs_bld->uri->port) {
+            err = hadoop_lerr_alloc(EINVAL, "The connection URI specified "
+                    "port %d, but hdfsBuilderSetNameNodePort specified port "
+                    "%d.  Please only specify the port once, preferrably in "
+                    "the URI.", hdfs_bld->uri->port, hdfs_bld->port);
+            goto done;
+        }
+        hdfs_bld->uri->port = hdfs_bld->port;
     }
     err = NULL;
 
 done:
-    free(malloced_uri_str);
-    if (uri_state.uri) {
-        uriFreeUriMembersA(&uri);
-    }
+    free(uri_dbg);
+    free(uri_str);
     return err;
 }
 
@@ -296,7 +286,7 @@ hdfsFS hdfsBuilderConnect(struct hdfsBuilder *bld)
         goto done;
 
     // Find out the native filesystems we should use for this URI.
-    if (asprintf(&fs_list_key, "%s.native.handler.", bld->uri_scheme) < 0) {
+    if (asprintf(&fs_list_key, "%s.native.handler.", bld->uri->scheme) < 0) {
         fs_list_key = NULL;
         err = hadoop_lerr_alloc(ENOMEM, "hdfsBuilderConnect: OOM");
         goto done;
@@ -385,6 +375,7 @@ void hdfsFreeBuilder(struct hdfsBuilder *bld)
         free(cur);
         cur = next;
     }
+    hadoop_uri_free(bld->uri);
     hconf_free(bld->hconf);
     free(bld);
 }

+ 2 - 4
hadoop-native-core/src/main/native/fs/fs.h

@@ -24,6 +24,7 @@
 #include <inttypes.h>
 
 struct hadoop_err;
+struct hadoop_uri;
 struct hconf;
 
 /**
@@ -122,10 +123,7 @@ struct hdfsBuilder {
     const char *userName;
     struct hdfsBuilderConfOpt *opts;
     struct hconf *hconf;
-    char *uri_scheme;
-    char *uri_user_info;
-    char *uri_authority;
-    uint16_t uri_port;
+    struct hadoop_uri *uri;
 };
 
 /**

+ 80 - 116
hadoop-native-core/src/main/native/ndfs/ndfs.c

@@ -33,7 +33,6 @@
 #include <stdlib.h>
 #include <string.h>
 #include <strings.h>
-#include <uriparser/Uri.h>
 #include <uv.h>
 
 #define DEFAULT_NN_PORT 8020
@@ -67,14 +66,17 @@ struct native_fs {
     /** The default block size obtained from getServerDefaults. */
     int64_t default_block_size;
 
-    /** User name to use for RPCs.  Immutable. */
-    char *user_name;
+    /** Prefix to use when building URLs. */
+    char *url_prefix;
+
+    /** URI that was used when connecting. */
+    struct hadoop_uri *conn_uri;
 
     /**
      * A dynamically allocated working directory which will be prepended to
      * all relative paths.
      */
-    UriUriA *working_uri; 
+    struct hadoop_uri *working_uri;
 
     /** Lock which protects the working_uri. */
     uv_mutex_t working_uri_lock;
@@ -135,10 +137,12 @@ static hdfsFileInfo *ndfs_get_path_info(hdfsFS bfs, const char* uri);
 static struct hadoop_err *ndfs_connect_setup_conf(struct native_fs *fs,
                                                   struct hconf *hconf);
 
+static void ndfs_free(struct native_fs *fs);
+
 static void ndfs_nn_proxy_init(struct native_fs *fs, struct hrpc_proxy *proxy)
 {
     hrpc_proxy_init(proxy, fs->msgr, &fs->nn_addr, CLIENT_NN_PROTOCOL,
-                    fs->user_name);
+                    fs->conn_uri->user_info);
 }
 
 /**
@@ -153,47 +157,34 @@ static void ndfs_nn_proxy_init(struct native_fs *fs, struct hrpc_proxy *proxy)
 static struct hadoop_err *build_path(struct native_fs *fs, const char *uri_str,
                                      char **out)
 {
-    char *path = NULL;
     struct hadoop_err *err = NULL;
-    UriParserStateA uri_state;
-    UriUriA uri;
-
-    memset(&uri_state, 0, sizeof(uri_state));
+    struct hadoop_uri *uri = NULL;
 
     uv_mutex_lock(&fs->working_uri_lock);
-    err = uri_parse(uri_str, &uri_state, &uri, fs->working_uri);
+    err = hadoop_uri_parse(uri_str, fs->working_uri, &uri, H_URI_PARSE_PATH);
     if (err)
         goto done;
     // TODO: check URI scheme and user against saved values?
-    err = uri_get_path(&uri, &path);
-    if (err)
+    if (uri->path[0]) {
+        *out = strdup(uri->path);
+    } else {
+        // As a special case, when the URI given has an empty path, we assume that
+        // we want the current working directory.  This is to allow things like
+        // hdfs://mynamenode to map to the current working directory, as they do in
+        // Hadoop.  Note that this is different than hdfs://mynamenode/ (note the
+        // trailing slash) which maps to the root directory.
+        *out = strdup(fs->working_uri->path);
+    }
+    if (!*out) {
+        err = hadoop_lerr_alloc(ENOMEM, "build_path: out of memory.");
         goto done;
-    // As a special case, when the URI given has an empty path, we assume that
-    // we want the current working directory.  This is to allow things like
-    // hdfs://mynamenode to map to the current working directory, as they do in
-    // Hadoop.  Note that this is different than hdfs://mynamenode/ (note the
-    // trailing slash) which maps to the root directory.
-    if (!path[0]) {
-        free(path);
-        path = NULL;
-        err = uri_get_path(fs->working_uri, &path);
-        if (err) {
-            goto done;
-        }
     }
     err = NULL;
 
 done:
     uv_mutex_unlock(&fs->working_uri_lock);
-    if (uri_state.uri) {
-        uriFreeUriMembersA(&uri);
-    }
-    if (err) {
-        free(path);
-        return err;
-    }
-    *out = path;
-    return NULL;
+    hadoop_uri_free(uri);
+    return err;
 }
 
 static int ndfs_file_is_open_for_read(hdfsFile bfile)
@@ -307,8 +298,8 @@ done:
     return err;
 }
 
-static struct hadoop_err *get_namenode_addr(const struct hdfsBuilder *hdfs_bld,
-            struct sockaddr_in *nn_addr)
+static struct hadoop_err *get_namenode_addr(const struct hadoop_uri *conn_uri,
+        const struct hdfsBuilder *hdfs_bld, struct sockaddr_in *nn_addr)
 {
     const char *nameservice_id;
     const char *rpc_addr;
@@ -322,7 +313,7 @@ static struct hadoop_err *get_namenode_addr(const struct hdfsBuilder *hdfs_bld,
     if (rpc_addr) {
         return parse_rpc_addr(rpc_addr, nn_addr, hdfs_bld->port);
     }
-    return parse_rpc_addr(hdfs_bld->uri_authority, nn_addr, hdfs_bld->port);
+    return parse_rpc_addr(conn_uri->auth, nn_addr, hdfs_bld->port);
 }
 
 struct hadoop_err *ndfs_connect(struct hdfsBuilder *hdfs_bld,
@@ -332,9 +323,8 @@ struct hadoop_err *ndfs_connect(struct hdfsBuilder *hdfs_bld,
     struct native_fs *fs = NULL;
     struct hrpc_messenger_builder *msgr_bld;
     struct ndfs_server_defaults defaults;
-    int working_dir_lock_created = 0;
+    int used_port;
     char *working_dir = NULL;
-    UriParserStateA uri_state;
 
     fs = calloc(1, sizeof(*fs));
     if (!fs) {
@@ -343,11 +333,23 @@ struct hadoop_err *ndfs_connect(struct hdfsBuilder *hdfs_bld,
         goto done;
     }
     fs->base.ty = HADOOP_FS_TY_NDFS;
-    fs->user_name = strdup(hdfs_bld->uri_user_info); 
-    if (!fs->user_name) {
-        err = hadoop_lerr_alloc(ENOMEM, "failed to allocate space "
-                                "for the user name.");
-        goto done;
+    fs->conn_uri = hdfs_bld->uri;
+    hdfs_bld->uri = NULL;
+    // Calculate our url_prefix.  We'll need this when spitting out URIs from
+    // listStatus and getFileInfo.  We don't include the port in this URL
+    // prefix unless it is non-standard.
+    used_port = ntohs(fs->nn_addr.sin_port);
+    if (used_port == DEFAULT_NN_PORT) {
+        err = dynprintf(&fs->url_prefix, "%s://%s",
+                     fs->conn_uri->scheme, fs->conn_uri->auth);
+        if (err)
+            goto done;
+    } else {
+        err = dynprintf(&fs->url_prefix, "%s://%s:%d",
+                     fs->conn_uri->scheme, fs->conn_uri->auth,
+                     used_port);
+        if (err)
+            goto done;
     }
     msgr_bld = hrpc_messenger_builder_alloc();
     if (!msgr_bld) {
@@ -355,38 +357,31 @@ struct hadoop_err *ndfs_connect(struct hdfsBuilder *hdfs_bld,
                                 "for a messenger builder.");
         goto done;
     }
-    err = get_namenode_addr(hdfs_bld, &fs->nn_addr);
+    err = get_namenode_addr(fs->conn_uri, hdfs_bld, &fs->nn_addr);
     if (err)
         goto done;
     err = hrpc_messenger_create(msgr_bld, &fs->msgr);
     if (err)
         goto done;
     // Get the default working directory
-    if (asprintf(&working_dir, "%s:///user/%s/",
-                 hdfs_bld->uri_scheme, hdfs_bld->uri_user_info) < 0) {
-        working_dir = NULL;
-        err = hadoop_lerr_alloc(ENOMEM, "ndfs_connect: OOM allocating "
-                                "working_dir");
-        goto done;
-    }
-    fs->working_uri = calloc(1, sizeof(*(fs->working_uri)));
-    if (!fs->working_uri) {
-        err = hadoop_lerr_alloc(ENOMEM, "ndfs_connect: OOM allocating "
-                                "fs->working_uri");
+    if (uv_mutex_init(&fs->working_uri_lock) < 0) {
+        err = hadoop_lerr_alloc(ENOMEM, "failed to create a mutex.");
         goto done;
     }
-    err = uri_parse_abs(working_dir, &uri_state, fs->working_uri,
-                        hdfs_bld->uri_scheme);
+    err = dynprintf(&working_dir, "%s:///user/%s/",
+                 fs->conn_uri->scheme, fs->conn_uri->user_info);
     if (err) {
-        free(fs->working_uri);
-        fs->working_uri = NULL;
+        uv_mutex_destroy(&fs->working_uri_lock);
         goto done;
     }
-    if (uv_mutex_init(&fs->working_uri_lock) < 0) {
-        err = hadoop_lerr_alloc(ENOMEM, "failed to create a mutex.");
+    err = hadoop_uri_parse(working_dir, NULL, &fs->working_uri,
+                           H_URI_PARSE_ALL | H_URI_APPEND_SLASH);
+    if (err) {
+        uv_mutex_destroy(&fs->working_uri_lock);
+        err = hadoop_err_prepend(err, 0, "ndfs_connect: error parsing "
+                                "working directory");
         goto done;
     }
-    working_dir_lock_created = 1;
     err = ndfs_connect_setup_conf(fs, hdfs_bld->hconf);
     if (err)
         goto done;
@@ -405,17 +400,7 @@ struct hadoop_err *ndfs_connect(struct hdfsBuilder *hdfs_bld,
 done:
     free(working_dir);
     if (err) {
-        if (fs) {
-            free(fs->user_name);
-            if (fs->working_uri) {
-                uriFreeUriMembersA(fs->working_uri);
-                free(fs->working_uri);
-            }
-            if (working_dir_lock_created) {
-                uv_mutex_destroy(&fs->working_uri_lock);
-            }
-            free(fs);
-        }
+        ndfs_free(fs);
         return err; 
     }
     *out = (struct hdfs_internal *)fs;
@@ -469,17 +454,24 @@ static struct hadoop_err *ndfs_connect_setup_conf(struct native_fs *fs,
     return NULL;
 }
 
-static int ndfs_disconnect(hdfsFS bfs)
+static void ndfs_free(struct native_fs *fs)
 {
-    struct native_fs *fs = (struct native_fs*)bfs;
-
     hrpc_messenger_shutdown(fs->msgr);
     hrpc_messenger_free(fs->msgr);
-    free(fs->user_name);
-    uriFreeUriMembersA(fs->working_uri);
-    free(fs->working_uri);
-    uv_mutex_destroy(&fs->working_uri_lock);
+    free(fs->url_prefix);
+    hadoop_uri_free(fs->conn_uri);
+    if (fs->working_uri) {
+        hadoop_uri_free(fs->working_uri);
+        uv_mutex_destroy(&fs->working_uri_lock);
+    }
     free(fs);
+}
+
+static int ndfs_disconnect(hdfsFS bfs)
+{
+    struct native_fs *fs = (struct native_fs*)bfs;
+
+    ndfs_free(fs);
     return 0;
 }
 
@@ -717,16 +709,9 @@ static char* ndfs_get_working_directory(hdfsFS bfs, char *buffer,
     size_t len;
     struct native_fs *fs = (struct native_fs *)bfs;
     struct hadoop_err *err = NULL;
-    char *working_path = NULL;
 
     uv_mutex_lock(&fs->working_uri_lock);
-    err = uri_get_path(fs->working_uri, &working_path);
-    if (err) {
-        err = hadoop_err_prepend(err, 0, "ndfs_get_working_directory: failed "
-                                 "to get the path of the working_uri.");
-        goto done;
-    }
-    len = strlen(working_path);
+    len = strlen(fs->conn_uri->path);
     if (len + 1 > bufferSize) {
         err = hadoop_lerr_alloc(ENAMETOOLONG, "ndfs_get_working_directory: "
                 "the buffer supplied was only %zd bytes, but we would need "
@@ -734,52 +719,31 @@ static char* ndfs_get_working_directory(hdfsFS bfs, char *buffer,
                 bufferSize, len + 1);
         goto done;
     }
-    strcpy(buffer, working_path);
+    strcpy(buffer, fs->conn_uri->path);
 done:
     uv_mutex_unlock(&fs->working_uri_lock);
-    free(working_path);
     return hadoopfs_errno_and_retptr(err, buffer);
 }
 
 static int ndfs_set_working_directory(hdfsFS bfs, const char* uri_str)
 {
     struct native_fs *fs = (struct native_fs *)bfs;
-    char *path = NULL;
-    char *scheme = NULL;
     struct hadoop_err *err = NULL;
-    UriParserStateA uri_state;
-    UriUriA *uri = NULL;
+    struct hadoop_uri *uri = NULL;
 
     uv_mutex_lock(&fs->working_uri_lock);
-    uri = calloc(1, sizeof(*uri));
-    if (!uri) {
-        err = hadoop_lerr_alloc(ENOMEM, "ndfs_set_working_directory: OOM");
-        goto done;
-    }
-    err = uri_get_scheme(fs->working_uri, &scheme);
+    err = hadoop_uri_parse(uri_str, fs->working_uri, &uri,
+            H_URI_PARSE_ALL | H_URI_APPEND_SLASH);
     if (err) {
-        err = hadoop_err_prepend(err, ENOMEM, "ndfs_set_working_directory: "
-                            "failed to get scheme of current working_uri");
+        err = hadoop_err_prepend(err, 0, "ndfs_set_working_directory: ");
         goto done;
     }
-    err = build_path(fs, uri_str, &path);
-    if (err)
-        goto done;
-    err = uri_parse_abs(path, &uri_state, uri, scheme);
-    if (err)
-        goto done;
-    uriFreeUriMembersA(fs->working_uri);
-    free(fs->working_uri);
+    hadoop_uri_free(fs->working_uri);
     fs->working_uri = uri;
     err = NULL;
 
 done:
-    if (err) {
-        free(uri);
-    }
     uv_mutex_unlock(&fs->working_uri_lock);
-    free(scheme);
-    free(path);
     return hadoopfs_errno_and_retcode(err);
 }